diff options
author | Martin Liska <mliska@suse.cz> | 2022-08-16 10:06:14 +0200 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-08-16 10:06:14 +0200 |
commit | 091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575 (patch) | |
tree | 07de02401c3374395a453724c4163d769c02e644 /gcc/config | |
parent | b629a7958faf817ef658e3ce59183bfb9ccefe96 (diff) | |
parent | 1c596391e150a6b0c55960c1c1cf1da76ea78230 (diff) | |
download | gcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.zip gcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.tar.gz gcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/aarch64/aarch64-sve.md | 4 | ||||
-rw-r--r-- | gcc/config/aarch64/aarch64.md | 3 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtin-types.def | 1 | ||||
-rw-r--r-- | gcc/config/i386/i386-builtins.cc | 21 | ||||
-rw-r--r-- | gcc/config/i386/i386-features.cc | 167 | ||||
-rw-r--r-- | gcc/config/i386/i386-modes.def | 2 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 80 | ||||
-rw-r--r-- | gcc/config/i386/i386.h | 4 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 45 | ||||
-rw-r--r-- | gcc/config/i386/predicates.md | 8 | ||||
-rw-r--r-- | gcc/config/i386/sse.md | 95 | ||||
-rw-r--r-- | gcc/config/rs6000/mma.md | 39 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.cc | 65 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-internal.h | 1 |
14 files changed, 430 insertions, 105 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md index bd60e65..e08bee1 100644 --- a/gcc/config/aarch64/aarch64-sve.md +++ b/gcc/config/aarch64/aarch64-sve.md @@ -8231,7 +8231,7 @@ [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_FULL_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (match_operand:<VPRED> 4 "register_operand" "Upa")) (match_dup:<VPRED> 1))) @@ -8267,7 +8267,7 @@ [(match_operand:<VPRED> 1) (const_int SVE_KNOWN_PTRUE) (match_operand:SVE_FULL_F 2 "register_operand" "w") - (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")] + (match_operand:SVE_FULL_F 3 "register_operand" "w")] UNSPEC_COND_FCMUO)) (not:<VPRED> (match_operand:<VPRED> 4 "register_operand" "Upa"))) diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md index ef0aed2..3ea16db 100644 --- a/gcc/config/aarch64/aarch64.md +++ b/gcc/config/aarch64/aarch64.md @@ -347,9 +347,6 @@ ; must not operate on inactive inputs if doing so could induce a fault. (SVE_STRICT_GP 1)]) -;; If further include files are added the defintion of MD_INCLUDES -;; must be updated. - (include "constraints.md") (include "predicates.md") (include "iterators.md") diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def index 7a2da1d..63a360b 100644 --- a/gcc/config/i386/i386-builtin-types.def +++ b/gcc/config/i386/i386-builtin-types.def @@ -69,6 +69,7 @@ DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node) DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node) DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node) DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node) +DEF_PRIMITIVE_TYPE (BFLOAT16, ix86_bf16_type_node) DEF_PRIMITIVE_TYPE (FLOAT, float_type_node) DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node) DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node) diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc index fe7243c..6a04fb5 100644 --- a/gcc/config/i386/i386-builtins.cc +++ b/gcc/config/i386/i386-builtins.cc @@ -126,6 +126,9 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX, static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1]; tree ix86_float16_type_node = NULL_TREE; +tree ix86_bf16_type_node = NULL_TREE; +tree ix86_bf16_ptr_type_node = NULL_TREE; + /* Retrieve an element from the above table, building some of the types lazily. */ @@ -1367,6 +1370,22 @@ ix86_register_float16_builtin_type (void) } static void +ix86_register_bf16_builtin_type (void) +{ + ix86_bf16_type_node = make_node (REAL_TYPE); + TYPE_PRECISION (ix86_bf16_type_node) = 16; + SET_TYPE_MODE (ix86_bf16_type_node, BFmode); + layout_type (ix86_bf16_type_node); + + if (!maybe_get_identifier ("__bf16") && TARGET_SSE2) + { + lang_hooks.types.register_builtin_type (ix86_bf16_type_node, + "__bf16"); + ix86_bf16_ptr_type_node = build_pointer_type (ix86_bf16_type_node); + } +} + +static void ix86_init_builtin_types (void) { tree float80_type_node, const_string_type_node; @@ -1396,6 +1415,8 @@ ix86_init_builtin_types (void) ix86_register_float16_builtin_type (); + ix86_register_bf16_builtin_type (); + const_string_type_node = build_pointer_type (build_qualified_type (char_type_node, TYPE_QUAL_CONST)); diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc index effc2f2..821d8c7 100644 --- a/gcc/config/i386/i386-features.cc +++ b/gcc/config/i386/i386-features.cc @@ -1209,6 +1209,8 @@ timode_scalar_chain::compute_convert_gain () rtx def_set = single_set (insn); rtx src = SET_SRC (def_set); rtx dst = SET_DEST (def_set); + HOST_WIDE_INT op1val; + int scost, vcost; int igain = 0; switch (GET_CODE (src)) @@ -1245,9 +1247,157 @@ timode_scalar_chain::compute_convert_gain () case ASHIFT: case LSHIFTRT: - /* For logical shifts by constant multiples of 8. */ - igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (4) - : COSTS_N_INSNS (1); + /* See ix86_expand_v1ti_shift. */ + op1val = XINT (src, 1); + if (optimize_insn_for_size_p ()) + { + if (op1val == 64 || op1val == 65) + scost = COSTS_N_BYTES (5); + else if (op1val >= 66) + scost = COSTS_N_BYTES (6); + else if (op1val == 1) + scost = COSTS_N_BYTES (8); + else + scost = COSTS_N_BYTES (9); + + if ((op1val & 7) == 0) + vcost = COSTS_N_BYTES (5); + else if (op1val > 64) + vcost = COSTS_N_BYTES (10); + else + vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23); + } + else + { + scost = COSTS_N_INSNS (2); + if ((op1val & 7) == 0) + vcost = COSTS_N_INSNS (1); + else if (op1val > 64) + vcost = COSTS_N_INSNS (2); + else + vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5); + } + igain = scost - vcost; + break; + + case ASHIFTRT: + /* See ix86_expand_v1ti_ashiftrt. */ + op1val = XINT (src, 1); + if (optimize_insn_for_size_p ()) + { + if (op1val == 64 || op1val == 127) + scost = COSTS_N_BYTES (7); + else if (op1val == 1) + scost = COSTS_N_BYTES (8); + else if (op1val == 65) + scost = COSTS_N_BYTES (10); + else if (op1val >= 66) + scost = COSTS_N_BYTES (11); + else + scost = COSTS_N_BYTES (9); + + if (op1val == 127) + vcost = COSTS_N_BYTES (10); + else if (op1val == 64) + vcost = COSTS_N_BYTES (14); + else if (op1val == 96) + vcost = COSTS_N_BYTES (18); + else if (op1val >= 111) + vcost = COSTS_N_BYTES (15); + else if (TARGET_AVX2 && op1val == 32) + vcost = COSTS_N_BYTES (16); + else if (TARGET_SSE4_1 && op1val == 32) + vcost = COSTS_N_BYTES (20); + else if (op1val >= 96) + vcost = COSTS_N_BYTES (23); + else if ((op1val & 7) == 0) + vcost = COSTS_N_BYTES (28); + else if (TARGET_AVX2 && op1val < 32) + vcost = COSTS_N_BYTES (30); + else if (op1val == 1 || op1val >= 64) + vcost = COSTS_N_BYTES (42); + else + vcost = COSTS_N_BYTES (47); + } + else + { + if (op1val >= 65 && op1val <= 126) + scost = COSTS_N_INSNS (3); + else + scost = COSTS_N_INSNS (2); + + if (op1val == 127) + vcost = COSTS_N_INSNS (2); + else if (op1val == 64) + vcost = COSTS_N_INSNS (3); + else if (op1val == 96) + vcost = COSTS_N_INSNS (4); + else if (op1val >= 111) + vcost = COSTS_N_INSNS (3); + else if (TARGET_AVX2 && op1val == 32) + vcost = COSTS_N_INSNS (3); + else if (TARGET_SSE4_1 && op1val == 32) + vcost = COSTS_N_INSNS (4); + else if (op1val >= 96) + vcost = COSTS_N_INSNS (5); + else if ((op1val & 7) == 0) + vcost = COSTS_N_INSNS (6); + else if (TARGET_AVX2 && op1val < 32) + vcost = COSTS_N_INSNS (6); + else if (op1val == 1 || op1val >= 64) + vcost = COSTS_N_INSNS (9); + else + vcost = COSTS_N_INSNS (10); + } + igain = scost - vcost; + break; + + case ROTATE: + case ROTATERT: + /* See ix86_expand_v1ti_rotate. */ + op1val = XINT (src, 1); + if (optimize_insn_for_size_p ()) + { + scost = COSTS_N_BYTES (13); + if ((op1val & 31) == 0) + vcost = COSTS_N_BYTES (5); + else if ((op1val & 7) == 0) + vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18); + else if (op1val > 32 && op1val < 96) + vcost = COSTS_N_BYTES (24); + else + vcost = COSTS_N_BYTES (19); + } + else + { + scost = COSTS_N_INSNS (3); + if ((op1val & 31) == 0) + vcost = COSTS_N_INSNS (1); + else if ((op1val & 7) == 0) + vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4); + else if (op1val > 32 && op1val < 96) + vcost = COSTS_N_INSNS (5); + else + vcost = COSTS_N_INSNS (1); + } + igain = scost - vcost; + break; + + case COMPARE: + if (XEXP (src, 1) == const0_rtx) + { + if (GET_CODE (XEXP (src, 0)) == AND) + /* and;and;or (9 bytes) vs. ptest (5 bytes). */ + igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4) + : COSTS_N_INSNS (2); + /* or (3 bytes) vs. ptest (5 bytes). */ + else if (optimize_insn_for_size_p ()) + igain = -COSTS_N_BYTES (2); + } + else if (XEXP (src, 1) == const1_rtx) + /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */ + igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6) + : -COSTS_N_INSNS (1); break; default: @@ -1503,6 +1653,9 @@ timode_scalar_chain::convert_insn (rtx_insn *insn) case ASHIFT: case LSHIFTRT: + case ASHIFTRT: + case ROTATERT: + case ROTATE: convert_op (&XEXP (src, 0), insn); PUT_MODE (src, V1TImode); break; @@ -1861,11 +2014,13 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn) case ASHIFT: case LSHIFTRT: - /* Handle logical shifts by integer constants between 0 and 120 - that are multiples of 8. */ + case ASHIFTRT: + case ROTATERT: + case ROTATE: + /* Handle shifts/rotates by integer constants between 0 and 127. */ return REG_P (XEXP (src, 0)) && CONST_INT_P (XEXP (src, 1)) - && (INTVAL (XEXP (src, 1)) & ~0x78) == 0; + && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0; default: return false; diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def index e2e1e18..b49daae 100644 --- a/gcc/config/i386/i386-modes.def +++ b/gcc/config/i386/i386-modes.def @@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format); FLOAT_MODE (TF, 16, ieee_quad_format); FLOAT_MODE (HF, 2, ieee_half_format); +FLOAT_MODE (BF, 2, 0); +ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format); /* In ILP32 mode, XFmode has size 12 and alignment 4. In LP64 mode, XFmode has size and alignment 16. */ diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 5be76e1..fa3722a 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -2399,6 +2399,7 @@ classify_argument (machine_mode mode, const_tree type, case E_CTImode: return 0; case E_HFmode: + case E_BFmode: if (!(bit_offset % 64)) classes[0] = X86_64_SSEHF_CLASS; else @@ -2792,9 +2793,10 @@ construct_container (machine_mode mode, machine_mode orig_mode, intreg++; break; case X86_64_SSEHF_CLASS: + tmpmode = (mode == BFmode ? BFmode : HFmode); exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, - gen_rtx_REG (HFmode, + gen_rtx_REG (tmpmode, GET_SSE_REGNO (sse_regno)), GEN_INT (i*8)); sse_regno++; @@ -4001,8 +4003,8 @@ function_value_32 (machine_mode orig_mode, machine_mode mode, /* Most things go in %eax. */ regno = AX_REG; - /* Return _Float16/_Complex _Foat16 by sse register. */ - if (mode == HFmode) + /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */ + if (mode == HFmode || mode == BFmode) regno = FIRST_SSE_REG; if (mode == HCmode) { @@ -4050,6 +4052,7 @@ function_value_64 (machine_mode orig_mode, machine_mode mode, switch (mode) { + case E_BFmode: case E_HFmode: case E_HCmode: case E_SFmode: @@ -5631,6 +5634,7 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands) return "%vmovss\t{%1, %0|%0, %1}"; case MODE_HF: + case MODE_BF: if (REG_P (operands[0]) && REG_P (operands[1])) return "vmovsh\t{%d1, %0|%0, %d1}"; else @@ -10648,6 +10652,11 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x) case CONST_VECTOR: if (!standard_sse_constant_p (x, mode)) return false; + break; + + case CONST_DOUBLE: + if (mode == E_BFmode) + return false; default: break; @@ -19415,7 +19424,8 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass, } /* Require movement to gpr, and then store to memory. */ - if ((mode == HFmode || mode == HImode || mode == V2QImode) + if ((mode == HFmode || mode == HImode || mode == V2QImode + || mode == BFmode) && !TARGET_SSE4_1 && SSE_CLASS_P (rclass) && !in_p && MEM_P (x)) @@ -22358,7 +22368,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode) return default_decimal_float_supported_p (); else if (mode == TFmode) return true; - else if (mode == HFmode && TARGET_SSE2) + else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2) return true; else return default_scalar_mode_supported_p (mode); @@ -22673,6 +22683,8 @@ ix86_mangle_type (const_tree type) switch (TYPE_MODE (type)) { + case E_BFmode: + return "u6__bf16"; case E_HFmode: /* _Float16 is "DF16_". Align with clang's decision in https://reviews.llvm.org/D33719. */ @@ -22688,6 +22700,55 @@ ix86_mangle_type (const_tree type) } } +/* Return the diagnostic message string if conversion from FROMTYPE to + TOTYPE is not allowed, NULL otherwise. */ + +static const char * +ix86_invalid_conversion (const_tree fromtype, const_tree totype) +{ + if (element_mode (fromtype) != element_mode (totype)) + { + /* Do no allow conversions to/from BFmode scalar types. */ + if (TYPE_MODE (fromtype) == BFmode) + return N_("invalid conversion from type %<__bf16%>"); + if (TYPE_MODE (totype) == BFmode) + return N_("invalid conversion to type %<__bf16%>"); + } + + /* Conversion allowed. */ + return NULL; +} + +/* Return the diagnostic message string if the unary operation OP is + not permitted on TYPE, NULL otherwise. */ + +static const char * +ix86_invalid_unary_op (int op, const_tree type) +{ + /* Reject all single-operand operations on BFmode except for &. */ + if (element_mode (type) == BFmode && op != ADDR_EXPR) + return N_("operation not permitted on type %<__bf16%>"); + + /* Operation allowed. */ + return NULL; +} + +/* Return the diagnostic message string if the binary operation OP is + not permitted on TYPE1 and TYPE2, NULL otherwise. */ + +static const char * +ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, + const_tree type2) +{ + /* Reject all 2-operand operations on BFmode. */ + if (element_mode (type1) == BFmode + || element_mode (type2) == BFmode) + return N_("operation not permitted on type %<__bf16%>"); + + /* Operation allowed. */ + return NULL; +} + static GTY(()) tree ix86_tls_stack_chk_guard_decl; static tree @@ -24745,6 +24806,15 @@ ix86_libgcc_floating_mode_supported_p #undef TARGET_MANGLE_TYPE #define TARGET_MANGLE_TYPE ix86_mangle_type +#undef TARGET_INVALID_CONVERSION +#define TARGET_INVALID_CONVERSION ix86_invalid_conversion + +#undef TARGET_INVALID_UNARY_OP +#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op + +#undef TARGET_INVALID_BINARY_OP +#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op + #undef TARGET_STACK_PROTECT_GUARD #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h index f16df63..0da3dce 100644 --- a/gcc/config/i386/i386.h +++ b/gcc/config/i386/i386.h @@ -1046,7 +1046,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \ || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \ || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \ - || (MODE) == HFmode) + || (MODE) == HFmode || (MODE) == BFmode) #define VALID_SSE_REG_MODE(MODE) \ ((MODE) == V1TImode || (MODE) == TImode \ @@ -1077,7 +1077,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv); || (MODE) == CQImode || (MODE) == CHImode \ || (MODE) == CSImode || (MODE) == CDImode \ || (MODE) == SDmode || (MODE) == DDmode \ - || (MODE) == HFmode || (MODE) == HCmode \ + || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode \ || (MODE) == V2HImode || (MODE) == V2HFmode \ || (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \ || (TARGET_64BIT \ diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index 2fde8cd..5f7e245 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -499,7 +499,7 @@ ;; Main data type used by the insn (define_attr "mode" - "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF, + "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF, V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF" (const_string "unknown")) @@ -1104,7 +1104,7 @@ ;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on ;; command line options just use GET_MODE_SIZE macro. (define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8") - (TI "16") (HF "2") (SF "4") (DF "8") + (TI "16") (HF "2") (BF "2") (SF "4") (DF "8") (XF "GET_MODE_SIZE (XFmode)") (V16QI "16") (V32QI "32") (V64QI "64") (V8HI "16") (V16HI "32") (V32HI "64") @@ -1248,7 +1248,7 @@ (define_mode_iterator X87MODEF [SF DF XF]) ;; All x87 floating point modes plus HFmode -(define_mode_iterator X87MODEFH [HF SF DF XF]) +(define_mode_iterator X87MODEFH [HF SF DF XF BF]) ;; All SSE floating point modes (define_mode_iterator SSEMODEF [HF SF DF TF]) @@ -3439,9 +3439,11 @@ operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx); }) -(define_insn "*pushhf_rex64" - [(set (match_operand:HF 0 "push_operand" "=X,X") - (match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))] +(define_mode_iterator HFBF [HF BF]) + +(define_insn "*push<mode>_rex64" + [(set (match_operand:HFBF 0 "push_operand" "=X,X") + (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))] "TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ @@ -3452,9 +3454,9 @@ (set_attr "type" "push,multi") (set_attr "mode" "DI,TI")]) -(define_insn "*pushhf" - [(set (match_operand:HF 0 "push_operand" "=X,X") - (match_operand:HF 1 "general_no_elim_operand" "rmF,x"))] +(define_insn "*push<mode>" + [(set (match_operand:HFBF 0 "push_operand" "=X,X") + (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))] "!TARGET_64BIT" { /* Anything else should be already split before reg-stack. */ @@ -3493,7 +3495,7 @@ (set_attr "unit" "i387,*,*") (set_attr "mode" "SF,SI,SF")]) -(define_mode_iterator MODESH [SF HF]) +(define_mode_iterator MODESH [SF HF BF]) ;; %%% Kill this when call knows how to work this out. (define_split [(set (match_operand:MODESH 0 "push_operand") @@ -3981,18 +3983,21 @@ ] (const_string "*")))]) -(define_insn "*movhf_internal" - [(set (match_operand:HF 0 "nonimmediate_operand" +(define_mode_attr hfbfconstf + [(HF "F") (BF "")]) + +(define_insn "*mov<mode>_internal" + [(set (match_operand:HFBF 0 "nonimmediate_operand" "=?r,?r,?r,?m,v,v,?r,m,?v,v") - (match_operand:HF 1 "general_operand" - "r ,F ,m ,rF,C,v, v,v,r ,m"))] + (match_operand:HFBF 1 "general_operand" + "r ,F ,m ,r<hfbfconstf>,C,v, v,v,r ,m"))] "!(MEM_P (operands[0]) && MEM_P (operands[1])) && (lra_in_progress || reload_completed || !CONST_DOUBLE_P (operands[1]) || (TARGET_SSE2 - && standard_sse_constant_p (operands[1], HFmode) == 1) - || memory_operand (operands[0], HFmode))" + && standard_sse_constant_p (operands[1], <MODE>mode) == 1) + || memory_operand (operands[0], <MODE>mode))" { switch (get_attr_type (insn)) { @@ -4087,7 +4092,13 @@ (not (match_test "TARGET_HIMODE_MATH")))) (const_string "SI") ] - (const_string "HI")))]) + (const_string "HI"))) + (set (attr "enabled") + (cond [(and (match_test "<MODE>mode == BFmode") + (eq_attr "alternative" "1")) + (symbol_ref "false") + ] + (const_string "*")))]) (define_split [(set (match_operand 0 "any_fp_register_operand") diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md index 064596d..4f16bb7 100644 --- a/gcc/config/i386/predicates.md +++ b/gcc/config/i386/predicates.md @@ -931,6 +931,14 @@ return val <= 255*8 && val % 8 == 0; }) +;; Match 1 to 255 except multiples of 8 +(define_predicate "const_0_to_255_not_mul_8_operand" + (match_code "const_int") +{ + unsigned HOST_WIDE_INT val = INTVAL (op); + return val <= 255 && val % 8 != 0; +}) + ;; Return true if OP is CONST_INT >= 1 and <= 31 (a valid operand ;; for shift & compare patterns, as shifting by 0 does not change flags). (define_predicate "const_1_to_31_operand" diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md index ccd9d00..b23f07e 100644 --- a/gcc/config/i386/sse.md +++ b/gcc/config/i386/sse.md @@ -15995,10 +15995,28 @@ (define_expand "ashlv1ti3" [(set (match_operand:V1TI 0 "register_operand") + (ashift:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:QI 2 "general_operand")))] + "TARGET_SSE2 && TARGET_64BIT" +{ + if (!CONST_INT_P (operands[2])) + { + ix86_expand_v1ti_shift (ASHIFT, operands); + DONE; + } +}) + +(define_insn_and_split "*ashlv1ti3_internal" + [(set (match_operand:V1TI 0 "register_operand") (ashift:V1TI (match_operand:V1TI 1 "register_operand") - (match_operand:QI 2 "general_operand")))] - "TARGET_SSE2 && TARGET_64BIT" + (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))] + "TARGET_SSE2 && TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] { ix86_expand_v1ti_shift (ASHIFT, operands); DONE; @@ -16011,6 +16029,24 @@ (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { + if (!CONST_INT_P (operands[2])) + { + ix86_expand_v1ti_shift (LSHIFTRT, operands); + DONE; + } +}) + +(define_insn_and_split "*lshrv1ti3_internal" + [(set (match_operand:V1TI 0 "register_operand") + (lshiftrt:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))] + "TARGET_SSE2 && TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ ix86_expand_v1ti_shift (LSHIFTRT, operands); DONE; }) @@ -16022,6 +16058,25 @@ (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { + if (!CONST_INT_P (operands[2])) + { + ix86_expand_v1ti_ashiftrt (operands); + DONE; + } +}) + + +(define_insn_and_split "*ashrv1ti3_internal" + [(set (match_operand:V1TI 0 "register_operand") + (ashiftrt:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_0_to_255_operand")))] + "TARGET_SSE2 && TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ ix86_expand_v1ti_ashiftrt (operands); DONE; }) @@ -16033,6 +16088,24 @@ (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { + if (!CONST_INT_P (operands[2])) + { + ix86_expand_v1ti_rotate (ROTATE, operands); + DONE; + } +}) + +(define_insn_and_split "*rotlv1ti3_internal" + [(set (match_operand:V1TI 0 "register_operand") + (rotate:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_0_to_255_operand")))] + "TARGET_SSE2 && TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ ix86_expand_v1ti_rotate (ROTATE, operands); DONE; }) @@ -16044,6 +16117,24 @@ (match_operand:QI 2 "general_operand")))] "TARGET_SSE2 && TARGET_64BIT" { + if (!CONST_INT_P (operands[2])) + { + ix86_expand_v1ti_rotate (ROTATERT, operands); + DONE; + } +}) + +(define_insn_and_split "*rotrv1ti3_internal" + [(set (match_operand:V1TI 0 "register_operand") + (rotatert:V1TI + (match_operand:V1TI 1 "register_operand") + (match_operand:SI 2 "const_0_to_255_operand")))] + "TARGET_SSE2 && TARGET_64BIT + && ix86_pre_reload_split ()" + "#" + "&& 1" + [(const_int 0)] +{ ix86_expand_v1ti_rotate (ROTATERT, operands); DONE; }) diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md index a183b6a..032f426 100644 --- a/gcc/config/rs6000/mma.md +++ b/gcc/config/rs6000/mma.md @@ -268,10 +268,25 @@ (define_expand "movoo" [(set (match_operand:OO 0 "nonimmediate_operand") (match_operand:OO 1 "input_operand"))] - "TARGET_MMA" + "" { - rs6000_emit_move (operands[0], operands[1], OOmode); - DONE; + if (TARGET_MMA) + { + rs6000_emit_move (operands[0], operands[1], OOmode); + DONE; + } + else if (currently_expanding_to_rtl && seen_error ()) + { + /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp + built-in function, even if we have already emitted error messages about + some missing required conditions. As shown in that PR, without one + explicit mov optab on OOmode provided, it would call emit_move_insn + recursively. So we allow this pattern to be generated when we are + expanding to RTL and have seen errors. It would not cause further ICEs + as the compilation would stop soon after expanding. */ + } + else + gcc_unreachable (); }) (define_insn_and_split "*movoo" @@ -300,10 +315,22 @@ (define_expand "movxo" [(set (match_operand:XO 0 "nonimmediate_operand") (match_operand:XO 1 "input_operand"))] - "TARGET_MMA" + "" { - rs6000_emit_move (operands[0], operands[1], XOmode); - DONE; + if (TARGET_MMA) + { + rs6000_emit_move (operands[0], operands[1], XOmode); + DONE; + } + else if (currently_expanding_to_rtl && seen_error ()) + { + /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp + built-in function, even if we have already emitted error messages about + some missing required conditions. So do the same handlings for XOmode + as OOmode here. */ + } + else + gcc_unreachable (); }) (define_insn_and_split "*movxo" diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc index 2819773..12afa86 100644 --- a/gcc/config/rs6000/rs6000-builtin.cc +++ b/gcc/config/rs6000/rs6000-builtin.cc @@ -830,44 +830,8 @@ rs6000_init_builtins (void) fprintf (stderr, "\nAutogenerated built-in functions:\n\n"); for (int i = 1; i < (int) RS6000_BIF_MAX; i++) { - bif_enable e = rs6000_builtin_info[i].enable; - if (e == ENB_P5 && !TARGET_POPCNTB) - continue; - if (e == ENB_P6 && !TARGET_CMPB) - continue; - if (e == ENB_P6_64 && !(TARGET_CMPB && TARGET_POWERPC64)) - continue; - if (e == ENB_ALTIVEC && !TARGET_ALTIVEC) - continue; - if (e == ENB_VSX && !TARGET_VSX) - continue; - if (e == ENB_P7 && !TARGET_POPCNTD) - continue; - if (e == ENB_P7_64 && !(TARGET_POPCNTD && TARGET_POWERPC64)) - continue; - if (e == ENB_P8 && !TARGET_DIRECT_MOVE) - continue; - if (e == ENB_P8V && !TARGET_P8_VECTOR) - continue; - if (e == ENB_P9 && !TARGET_MODULO) - continue; - if (e == ENB_P9_64 && !(TARGET_MODULO && TARGET_POWERPC64)) - continue; - if (e == ENB_P9V && !TARGET_P9_VECTOR) - continue; - if (e == ENB_IEEE128_HW && !TARGET_FLOAT128_HW) - continue; - if (e == ENB_DFP && !TARGET_DFP) - continue; - if (e == ENB_CRYPTO && !TARGET_CRYPTO) - continue; - if (e == ENB_HTM && !TARGET_HTM) - continue; - if (e == ENB_P10 && !TARGET_POWER10) - continue; - if (e == ENB_P10_64 && !(TARGET_POWER10 && TARGET_POWERPC64)) - continue; - if (e == ENB_MMA && !TARGET_MMA) + enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i; + if (!rs6000_builtin_is_supported (fn_code)) continue; tree fntype = rs6000_builtin_info[i].fntype; tree t = TREE_TYPE (fntype); @@ -3370,29 +3334,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */, but check for actual availability now, during expand time. For invalid builtins, generate a normal call. */ bifdata *bifaddr = &rs6000_builtin_info[uns_fcode]; - bif_enable e = bifaddr->enable; - - if (!(e == ENB_ALWAYS - || (e == ENB_P5 && TARGET_POPCNTB) - || (e == ENB_P6 && TARGET_CMPB) - || (e == ENB_P6_64 && TARGET_CMPB && TARGET_POWERPC64) - || (e == ENB_ALTIVEC && TARGET_ALTIVEC) - || (e == ENB_CELL && TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL) - || (e == ENB_VSX && TARGET_VSX) - || (e == ENB_P7 && TARGET_POPCNTD) - || (e == ENB_P7_64 && TARGET_POPCNTD && TARGET_POWERPC64) - || (e == ENB_P8 && TARGET_DIRECT_MOVE) - || (e == ENB_P8V && TARGET_P8_VECTOR) - || (e == ENB_P9 && TARGET_MODULO) - || (e == ENB_P9_64 && TARGET_MODULO && TARGET_POWERPC64) - || (e == ENB_P9V && TARGET_P9_VECTOR) - || (e == ENB_IEEE128_HW && TARGET_FLOAT128_HW) - || (e == ENB_DFP && TARGET_DFP) - || (e == ENB_CRYPTO && TARGET_CRYPTO) - || (e == ENB_HTM && TARGET_HTM) - || (e == ENB_P10 && TARGET_POWER10) - || (e == ENB_P10_64 && TARGET_POWER10 && TARGET_POWERPC64) - || (e == ENB_MMA && TARGET_MMA))) + + if (!rs6000_builtin_is_supported (fcode)) { rs6000_invalid_builtin (fcode); return expand_call (exp, target, ignore); diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h index 8ee8c98..b9e82c0 100644 --- a/gcc/config/rs6000/rs6000-internal.h +++ b/gcc/config/rs6000/rs6000-internal.h @@ -82,7 +82,6 @@ extern const char *rs6000_machine; from rs6000-logue.cc */ extern int uses_TOC (void); -extern bool rs6000_global_entry_point_needed_p (void); extern void rs6000_output_function_prologue (FILE *file); extern void rs6000_output_function_epilogue (FILE *file); extern bool rs6000_function_ok_for_sibcall (tree decl, tree exp); |