aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2022-08-16 10:06:14 +0200
committerMartin Liska <mliska@suse.cz>2022-08-16 10:06:14 +0200
commit091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575 (patch)
tree07de02401c3374395a453724c4163d769c02e644 /gcc/config
parentb629a7958faf817ef658e3ce59183bfb9ccefe96 (diff)
parent1c596391e150a6b0c55960c1c1cf1da76ea78230 (diff)
downloadgcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.zip
gcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.tar.gz
gcc-091222fb0aaa09dcf90f2bc747f1d8a6a8ef1575.tar.bz2
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-sve.md4
-rw-r--r--gcc/config/aarch64/aarch64.md3
-rw-r--r--gcc/config/i386/i386-builtin-types.def1
-rw-r--r--gcc/config/i386/i386-builtins.cc21
-rw-r--r--gcc/config/i386/i386-features.cc167
-rw-r--r--gcc/config/i386/i386-modes.def2
-rw-r--r--gcc/config/i386/i386.cc80
-rw-r--r--gcc/config/i386/i386.h4
-rw-r--r--gcc/config/i386/i386.md45
-rw-r--r--gcc/config/i386/predicates.md8
-rw-r--r--gcc/config/i386/sse.md95
-rw-r--r--gcc/config/rs6000/mma.md39
-rw-r--r--gcc/config/rs6000/rs6000-builtin.cc65
-rw-r--r--gcc/config/rs6000/rs6000-internal.h1
14 files changed, 430 insertions, 105 deletions
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index bd60e65..e08bee1 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -8231,7 +8231,7 @@
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
(match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+ (match_operand:SVE_FULL_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO))
(match_operand:<VPRED> 4 "register_operand" "Upa"))
(match_dup:<VPRED> 1)))
@@ -8267,7 +8267,7 @@
[(match_operand:<VPRED> 1)
(const_int SVE_KNOWN_PTRUE)
(match_operand:SVE_FULL_F 2 "register_operand" "w")
- (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
+ (match_operand:SVE_FULL_F 3 "register_operand" "w")]
UNSPEC_COND_FCMUO))
(not:<VPRED>
(match_operand:<VPRED> 4 "register_operand" "Upa")))
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index ef0aed2..3ea16db 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -347,9 +347,6 @@
; must not operate on inactive inputs if doing so could induce a fault.
(SVE_STRICT_GP 1)])
-;; If further include files are added the defintion of MD_INCLUDES
-;; must be updated.
-
(include "constraints.md")
(include "predicates.md")
(include "iterators.md")
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 7a2da1d..63a360b 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -69,6 +69,7 @@ DEF_PRIMITIVE_TYPE (UINT16, short_unsigned_type_node)
DEF_PRIMITIVE_TYPE (INT64, long_long_integer_type_node)
DEF_PRIMITIVE_TYPE (UINT64, long_long_unsigned_type_node)
DEF_PRIMITIVE_TYPE (FLOAT16, ix86_float16_type_node)
+DEF_PRIMITIVE_TYPE (BFLOAT16, ix86_bf16_type_node)
DEF_PRIMITIVE_TYPE (FLOAT, float_type_node)
DEF_PRIMITIVE_TYPE (DOUBLE, double_type_node)
DEF_PRIMITIVE_TYPE (FLOAT80, float80_type_node)
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index fe7243c..6a04fb5 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -126,6 +126,9 @@ BDESC_VERIFYS (IX86_BUILTIN_MAX,
static GTY(()) tree ix86_builtin_type_tab[(int) IX86_BT_LAST_CPTR + 1];
tree ix86_float16_type_node = NULL_TREE;
+tree ix86_bf16_type_node = NULL_TREE;
+tree ix86_bf16_ptr_type_node = NULL_TREE;
+
/* Retrieve an element from the above table, building some of
the types lazily. */
@@ -1367,6 +1370,22 @@ ix86_register_float16_builtin_type (void)
}
static void
+ix86_register_bf16_builtin_type (void)
+{
+ ix86_bf16_type_node = make_node (REAL_TYPE);
+ TYPE_PRECISION (ix86_bf16_type_node) = 16;
+ SET_TYPE_MODE (ix86_bf16_type_node, BFmode);
+ layout_type (ix86_bf16_type_node);
+
+ if (!maybe_get_identifier ("__bf16") && TARGET_SSE2)
+ {
+ lang_hooks.types.register_builtin_type (ix86_bf16_type_node,
+ "__bf16");
+ ix86_bf16_ptr_type_node = build_pointer_type (ix86_bf16_type_node);
+ }
+}
+
+static void
ix86_init_builtin_types (void)
{
tree float80_type_node, const_string_type_node;
@@ -1396,6 +1415,8 @@ ix86_init_builtin_types (void)
ix86_register_float16_builtin_type ();
+ ix86_register_bf16_builtin_type ();
+
const_string_type_node
= build_pointer_type (build_qualified_type
(char_type_node, TYPE_QUAL_CONST));
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index effc2f2..821d8c7 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1209,6 +1209,8 @@ timode_scalar_chain::compute_convert_gain ()
rtx def_set = single_set (insn);
rtx src = SET_SRC (def_set);
rtx dst = SET_DEST (def_set);
+ HOST_WIDE_INT op1val;
+ int scost, vcost;
int igain = 0;
switch (GET_CODE (src))
@@ -1245,9 +1247,157 @@ timode_scalar_chain::compute_convert_gain ()
case ASHIFT:
case LSHIFTRT:
- /* For logical shifts by constant multiples of 8. */
- igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (4)
- : COSTS_N_INSNS (1);
+ /* See ix86_expand_v1ti_shift. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ if (op1val == 64 || op1val == 65)
+ scost = COSTS_N_BYTES (5);
+ else if (op1val >= 66)
+ scost = COSTS_N_BYTES (6);
+ else if (op1val == 1)
+ scost = COSTS_N_BYTES (8);
+ else
+ scost = COSTS_N_BYTES (9);
+
+ if ((op1val & 7) == 0)
+ vcost = COSTS_N_BYTES (5);
+ else if (op1val > 64)
+ vcost = COSTS_N_BYTES (10);
+ else
+ vcost = TARGET_AVX ? COSTS_N_BYTES (19) : COSTS_N_BYTES (23);
+ }
+ else
+ {
+ scost = COSTS_N_INSNS (2);
+ if ((op1val & 7) == 0)
+ vcost = COSTS_N_INSNS (1);
+ else if (op1val > 64)
+ vcost = COSTS_N_INSNS (2);
+ else
+ vcost = TARGET_AVX ? COSTS_N_INSNS (4) : COSTS_N_INSNS (5);
+ }
+ igain = scost - vcost;
+ break;
+
+ case ASHIFTRT:
+ /* See ix86_expand_v1ti_ashiftrt. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ if (op1val == 64 || op1val == 127)
+ scost = COSTS_N_BYTES (7);
+ else if (op1val == 1)
+ scost = COSTS_N_BYTES (8);
+ else if (op1val == 65)
+ scost = COSTS_N_BYTES (10);
+ else if (op1val >= 66)
+ scost = COSTS_N_BYTES (11);
+ else
+ scost = COSTS_N_BYTES (9);
+
+ if (op1val == 127)
+ vcost = COSTS_N_BYTES (10);
+ else if (op1val == 64)
+ vcost = COSTS_N_BYTES (14);
+ else if (op1val == 96)
+ vcost = COSTS_N_BYTES (18);
+ else if (op1val >= 111)
+ vcost = COSTS_N_BYTES (15);
+ else if (TARGET_AVX2 && op1val == 32)
+ vcost = COSTS_N_BYTES (16);
+ else if (TARGET_SSE4_1 && op1val == 32)
+ vcost = COSTS_N_BYTES (20);
+ else if (op1val >= 96)
+ vcost = COSTS_N_BYTES (23);
+ else if ((op1val & 7) == 0)
+ vcost = COSTS_N_BYTES (28);
+ else if (TARGET_AVX2 && op1val < 32)
+ vcost = COSTS_N_BYTES (30);
+ else if (op1val == 1 || op1val >= 64)
+ vcost = COSTS_N_BYTES (42);
+ else
+ vcost = COSTS_N_BYTES (47);
+ }
+ else
+ {
+ if (op1val >= 65 && op1val <= 126)
+ scost = COSTS_N_INSNS (3);
+ else
+ scost = COSTS_N_INSNS (2);
+
+ if (op1val == 127)
+ vcost = COSTS_N_INSNS (2);
+ else if (op1val == 64)
+ vcost = COSTS_N_INSNS (3);
+ else if (op1val == 96)
+ vcost = COSTS_N_INSNS (4);
+ else if (op1val >= 111)
+ vcost = COSTS_N_INSNS (3);
+ else if (TARGET_AVX2 && op1val == 32)
+ vcost = COSTS_N_INSNS (3);
+ else if (TARGET_SSE4_1 && op1val == 32)
+ vcost = COSTS_N_INSNS (4);
+ else if (op1val >= 96)
+ vcost = COSTS_N_INSNS (5);
+ else if ((op1val & 7) == 0)
+ vcost = COSTS_N_INSNS (6);
+ else if (TARGET_AVX2 && op1val < 32)
+ vcost = COSTS_N_INSNS (6);
+ else if (op1val == 1 || op1val >= 64)
+ vcost = COSTS_N_INSNS (9);
+ else
+ vcost = COSTS_N_INSNS (10);
+ }
+ igain = scost - vcost;
+ break;
+
+ case ROTATE:
+ case ROTATERT:
+ /* See ix86_expand_v1ti_rotate. */
+ op1val = XINT (src, 1);
+ if (optimize_insn_for_size_p ())
+ {
+ scost = COSTS_N_BYTES (13);
+ if ((op1val & 31) == 0)
+ vcost = COSTS_N_BYTES (5);
+ else if ((op1val & 7) == 0)
+ vcost = TARGET_AVX ? COSTS_N_BYTES (13) : COSTS_N_BYTES (18);
+ else if (op1val > 32 && op1val < 96)
+ vcost = COSTS_N_BYTES (24);
+ else
+ vcost = COSTS_N_BYTES (19);
+ }
+ else
+ {
+ scost = COSTS_N_INSNS (3);
+ if ((op1val & 31) == 0)
+ vcost = COSTS_N_INSNS (1);
+ else if ((op1val & 7) == 0)
+ vcost = TARGET_AVX ? COSTS_N_INSNS (3) : COSTS_N_INSNS (4);
+ else if (op1val > 32 && op1val < 96)
+ vcost = COSTS_N_INSNS (5);
+ else
+ vcost = COSTS_N_INSNS (1);
+ }
+ igain = scost - vcost;
+ break;
+
+ case COMPARE:
+ if (XEXP (src, 1) == const0_rtx)
+ {
+ if (GET_CODE (XEXP (src, 0)) == AND)
+ /* and;and;or (9 bytes) vs. ptest (5 bytes). */
+ igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
+ : COSTS_N_INSNS (2);
+ /* or (3 bytes) vs. ptest (5 bytes). */
+ else if (optimize_insn_for_size_p ())
+ igain = -COSTS_N_BYTES (2);
+ }
+ else if (XEXP (src, 1) == const1_rtx)
+ /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes). */
+ igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
+ : -COSTS_N_INSNS (1);
break;
default:
@@ -1503,6 +1653,9 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
case ASHIFT:
case LSHIFTRT:
+ case ASHIFTRT:
+ case ROTATERT:
+ case ROTATE:
convert_op (&XEXP (src, 0), insn);
PUT_MODE (src, V1TImode);
break;
@@ -1861,11 +2014,13 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
case ASHIFT:
case LSHIFTRT:
- /* Handle logical shifts by integer constants between 0 and 120
- that are multiples of 8. */
+ case ASHIFTRT:
+ case ROTATERT:
+ case ROTATE:
+ /* Handle shifts/rotates by integer constants between 0 and 127. */
return REG_P (XEXP (src, 0))
&& CONST_INT_P (XEXP (src, 1))
- && (INTVAL (XEXP (src, 1)) & ~0x78) == 0;
+ && (INTVAL (XEXP (src, 1)) & ~0x7f) == 0;
default:
return false;
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index e2e1e18..b49daae 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -24,6 +24,8 @@ along with GCC; see the file COPYING3. If not see
FRACTIONAL_FLOAT_MODE (XF, 80, 12, ieee_extended_intel_96_format);
FLOAT_MODE (TF, 16, ieee_quad_format);
FLOAT_MODE (HF, 2, ieee_half_format);
+FLOAT_MODE (BF, 2, 0);
+ADJUST_FLOAT_FORMAT (BF, &arm_bfloat_half_format);
/* In ILP32 mode, XFmode has size 12 and alignment 4.
In LP64 mode, XFmode has size and alignment 16. */
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 5be76e1..fa3722a 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -2399,6 +2399,7 @@ classify_argument (machine_mode mode, const_tree type,
case E_CTImode:
return 0;
case E_HFmode:
+ case E_BFmode:
if (!(bit_offset % 64))
classes[0] = X86_64_SSEHF_CLASS;
else
@@ -2792,9 +2793,10 @@ construct_container (machine_mode mode, machine_mode orig_mode,
intreg++;
break;
case X86_64_SSEHF_CLASS:
+ tmpmode = (mode == BFmode ? BFmode : HFmode);
exp [nexps++]
= gen_rtx_EXPR_LIST (VOIDmode,
- gen_rtx_REG (HFmode,
+ gen_rtx_REG (tmpmode,
GET_SSE_REGNO (sse_regno)),
GEN_INT (i*8));
sse_regno++;
@@ -4001,8 +4003,8 @@ function_value_32 (machine_mode orig_mode, machine_mode mode,
/* Most things go in %eax. */
regno = AX_REG;
- /* Return _Float16/_Complex _Foat16 by sse register. */
- if (mode == HFmode)
+ /* Return __bf16/ _Float16/_Complex _Foat16 by sse register. */
+ if (mode == HFmode || mode == BFmode)
regno = FIRST_SSE_REG;
if (mode == HCmode)
{
@@ -4050,6 +4052,7 @@ function_value_64 (machine_mode orig_mode, machine_mode mode,
switch (mode)
{
+ case E_BFmode:
case E_HFmode:
case E_HCmode:
case E_SFmode:
@@ -5631,6 +5634,7 @@ ix86_output_ssemov (rtx_insn *insn, rtx *operands)
return "%vmovss\t{%1, %0|%0, %1}";
case MODE_HF:
+ case MODE_BF:
if (REG_P (operands[0]) && REG_P (operands[1]))
return "vmovsh\t{%d1, %0|%0, %d1}";
else
@@ -10648,6 +10652,11 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
case CONST_VECTOR:
if (!standard_sse_constant_p (x, mode))
return false;
+ break;
+
+ case CONST_DOUBLE:
+ if (mode == E_BFmode)
+ return false;
default:
break;
@@ -19415,7 +19424,8 @@ ix86_secondary_reload (bool in_p, rtx x, reg_class_t rclass,
}
/* Require movement to gpr, and then store to memory. */
- if ((mode == HFmode || mode == HImode || mode == V2QImode)
+ if ((mode == HFmode || mode == HImode || mode == V2QImode
+ || mode == BFmode)
&& !TARGET_SSE4_1
&& SSE_CLASS_P (rclass)
&& !in_p && MEM_P (x))
@@ -22358,7 +22368,7 @@ ix86_scalar_mode_supported_p (scalar_mode mode)
return default_decimal_float_supported_p ();
else if (mode == TFmode)
return true;
- else if (mode == HFmode && TARGET_SSE2)
+ else if ((mode == HFmode || mode == BFmode) && TARGET_SSE2)
return true;
else
return default_scalar_mode_supported_p (mode);
@@ -22673,6 +22683,8 @@ ix86_mangle_type (const_tree type)
switch (TYPE_MODE (type))
{
+ case E_BFmode:
+ return "u6__bf16";
case E_HFmode:
/* _Float16 is "DF16_".
Align with clang's decision in https://reviews.llvm.org/D33719. */
@@ -22688,6 +22700,55 @@ ix86_mangle_type (const_tree type)
}
}
+/* Return the diagnostic message string if conversion from FROMTYPE to
+ TOTYPE is not allowed, NULL otherwise. */
+
+static const char *
+ix86_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+ if (element_mode (fromtype) != element_mode (totype))
+ {
+ /* Do no allow conversions to/from BFmode scalar types. */
+ if (TYPE_MODE (fromtype) == BFmode)
+ return N_("invalid conversion from type %<__bf16%>");
+ if (TYPE_MODE (totype) == BFmode)
+ return N_("invalid conversion to type %<__bf16%>");
+ }
+
+ /* Conversion allowed. */
+ return NULL;
+}
+
+/* Return the diagnostic message string if the unary operation OP is
+ not permitted on TYPE, NULL otherwise. */
+
+static const char *
+ix86_invalid_unary_op (int op, const_tree type)
+{
+ /* Reject all single-operand operations on BFmode except for &. */
+ if (element_mode (type) == BFmode && op != ADDR_EXPR)
+ return N_("operation not permitted on type %<__bf16%>");
+
+ /* Operation allowed. */
+ return NULL;
+}
+
+/* Return the diagnostic message string if the binary operation OP is
+ not permitted on TYPE1 and TYPE2, NULL otherwise. */
+
+static const char *
+ix86_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
+ const_tree type2)
+{
+ /* Reject all 2-operand operations on BFmode. */
+ if (element_mode (type1) == BFmode
+ || element_mode (type2) == BFmode)
+ return N_("operation not permitted on type %<__bf16%>");
+
+ /* Operation allowed. */
+ return NULL;
+}
+
static GTY(()) tree ix86_tls_stack_chk_guard_decl;
static tree
@@ -24745,6 +24806,15 @@ ix86_libgcc_floating_mode_supported_p
#undef TARGET_MANGLE_TYPE
#define TARGET_MANGLE_TYPE ix86_mangle_type
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION ix86_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP ix86_invalid_unary_op
+
+#undef TARGET_INVALID_BINARY_OP
+#define TARGET_INVALID_BINARY_OP ix86_invalid_binary_op
+
#undef TARGET_STACK_PROTECT_GUARD
#define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index f16df63..0da3dce 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1046,7 +1046,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode \
|| (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode \
|| (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == DFmode \
- || (MODE) == HFmode)
+ || (MODE) == HFmode || (MODE) == BFmode)
#define VALID_SSE_REG_MODE(MODE) \
((MODE) == V1TImode || (MODE) == TImode \
@@ -1077,7 +1077,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
|| (MODE) == CQImode || (MODE) == CHImode \
|| (MODE) == CSImode || (MODE) == CDImode \
|| (MODE) == SDmode || (MODE) == DDmode \
- || (MODE) == HFmode || (MODE) == HCmode \
+ || (MODE) == HFmode || (MODE) == HCmode || (MODE) == BFmode \
|| (MODE) == V2HImode || (MODE) == V2HFmode \
|| (MODE) == V1SImode || (MODE) == V4QImode || (MODE) == V2QImode \
|| (TARGET_64BIT \
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 2fde8cd..5f7e245 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -499,7 +499,7 @@
;; Main data type used by the insn
(define_attr "mode"
- "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
+ "unknown,none,QI,HI,SI,DI,TI,OI,XI,HF,BF,SF,DF,XF,TF,V32HF,V16HF,V8HF,
V16SF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,V8DF,V4HF,V2HF"
(const_string "unknown"))
@@ -1104,7 +1104,7 @@
;; GET_MODE_SIZE (<MODE>mode). For XFmode which depends on
;; command line options just use GET_MODE_SIZE macro.
(define_mode_attr MODE_SIZE [(QI "1") (HI "2") (SI "4") (DI "8")
- (TI "16") (HF "2") (SF "4") (DF "8")
+ (TI "16") (HF "2") (BF "2") (SF "4") (DF "8")
(XF "GET_MODE_SIZE (XFmode)")
(V16QI "16") (V32QI "32") (V64QI "64")
(V8HI "16") (V16HI "32") (V32HI "64")
@@ -1248,7 +1248,7 @@
(define_mode_iterator X87MODEF [SF DF XF])
;; All x87 floating point modes plus HFmode
-(define_mode_iterator X87MODEFH [HF SF DF XF])
+(define_mode_iterator X87MODEFH [HF SF DF XF BF])
;; All SSE floating point modes
(define_mode_iterator SSEMODEF [HF SF DF TF])
@@ -3439,9 +3439,11 @@
operands[0] = replace_equiv_address (operands[0], stack_pointer_rtx);
})
-(define_insn "*pushhf_rex64"
- [(set (match_operand:HF 0 "push_operand" "=X,X")
- (match_operand:HF 1 "nonmemory_no_elim_operand" "r,x"))]
+(define_mode_iterator HFBF [HF BF])
+
+(define_insn "*push<mode>_rex64"
+ [(set (match_operand:HFBF 0 "push_operand" "=X,X")
+ (match_operand:HFBF 1 "nonmemory_no_elim_operand" "r,x"))]
"TARGET_64BIT"
{
/* Anything else should be already split before reg-stack. */
@@ -3452,9 +3454,9 @@
(set_attr "type" "push,multi")
(set_attr "mode" "DI,TI")])
-(define_insn "*pushhf"
- [(set (match_operand:HF 0 "push_operand" "=X,X")
- (match_operand:HF 1 "general_no_elim_operand" "rmF,x"))]
+(define_insn "*push<mode>"
+ [(set (match_operand:HFBF 0 "push_operand" "=X,X")
+ (match_operand:HFBF 1 "general_no_elim_operand" "rmF,x"))]
"!TARGET_64BIT"
{
/* Anything else should be already split before reg-stack. */
@@ -3493,7 +3495,7 @@
(set_attr "unit" "i387,*,*")
(set_attr "mode" "SF,SI,SF")])
-(define_mode_iterator MODESH [SF HF])
+(define_mode_iterator MODESH [SF HF BF])
;; %%% Kill this when call knows how to work this out.
(define_split
[(set (match_operand:MODESH 0 "push_operand")
@@ -3981,18 +3983,21 @@
]
(const_string "*")))])
-(define_insn "*movhf_internal"
- [(set (match_operand:HF 0 "nonimmediate_operand"
+(define_mode_attr hfbfconstf
+ [(HF "F") (BF "")])
+
+(define_insn "*mov<mode>_internal"
+ [(set (match_operand:HFBF 0 "nonimmediate_operand"
"=?r,?r,?r,?m,v,v,?r,m,?v,v")
- (match_operand:HF 1 "general_operand"
- "r ,F ,m ,rF,C,v, v,v,r ,m"))]
+ (match_operand:HFBF 1 "general_operand"
+ "r ,F ,m ,r<hfbfconstf>,C,v, v,v,r ,m"))]
"!(MEM_P (operands[0]) && MEM_P (operands[1]))
&& (lra_in_progress
|| reload_completed
|| !CONST_DOUBLE_P (operands[1])
|| (TARGET_SSE2
- && standard_sse_constant_p (operands[1], HFmode) == 1)
- || memory_operand (operands[0], HFmode))"
+ && standard_sse_constant_p (operands[1], <MODE>mode) == 1)
+ || memory_operand (operands[0], <MODE>mode))"
{
switch (get_attr_type (insn))
{
@@ -4087,7 +4092,13 @@
(not (match_test "TARGET_HIMODE_MATH"))))
(const_string "SI")
]
- (const_string "HI")))])
+ (const_string "HI")))
+ (set (attr "enabled")
+ (cond [(and (match_test "<MODE>mode == BFmode")
+ (eq_attr "alternative" "1"))
+ (symbol_ref "false")
+ ]
+ (const_string "*")))])
(define_split
[(set (match_operand 0 "any_fp_register_operand")
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 064596d..4f16bb7 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -931,6 +931,14 @@
return val <= 255*8 && val % 8 == 0;
})
+;; Match 1 to 255 except multiples of 8
+(define_predicate "const_0_to_255_not_mul_8_operand"
+ (match_code "const_int")
+{
+ unsigned HOST_WIDE_INT val = INTVAL (op);
+ return val <= 255 && val % 8 != 0;
+})
+
;; Return true if OP is CONST_INT >= 1 and <= 31 (a valid operand
;; for shift & compare patterns, as shifting by 0 does not change flags).
(define_predicate "const_1_to_31_operand"
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index ccd9d00..b23f07e 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -15995,10 +15995,28 @@
(define_expand "ashlv1ti3"
[(set (match_operand:V1TI 0 "register_operand")
+ (ashift:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:QI 2 "general_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT"
+{
+ if (!CONST_INT_P (operands[2]))
+ {
+ ix86_expand_v1ti_shift (ASHIFT, operands);
+ DONE;
+ }
+})
+
+(define_insn_and_split "*ashlv1ti3_internal"
+ [(set (match_operand:V1TI 0 "register_operand")
(ashift:V1TI
(match_operand:V1TI 1 "register_operand")
- (match_operand:QI 2 "general_operand")))]
- "TARGET_SSE2 && TARGET_64BIT"
+ (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
{
ix86_expand_v1ti_shift (ASHIFT, operands);
DONE;
@@ -16011,6 +16029,24 @@
(match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
+ if (!CONST_INT_P (operands[2]))
+ {
+ ix86_expand_v1ti_shift (LSHIFTRT, operands);
+ DONE;
+ }
+})
+
+(define_insn_and_split "*lshrv1ti3_internal"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (lshiftrt:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_not_mul_8_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
ix86_expand_v1ti_shift (LSHIFTRT, operands);
DONE;
})
@@ -16022,6 +16058,25 @@
(match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
+ if (!CONST_INT_P (operands[2]))
+ {
+ ix86_expand_v1ti_ashiftrt (operands);
+ DONE;
+ }
+})
+
+
+(define_insn_and_split "*ashrv1ti3_internal"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (ashiftrt:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
ix86_expand_v1ti_ashiftrt (operands);
DONE;
})
@@ -16033,6 +16088,24 @@
(match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
+ if (!CONST_INT_P (operands[2]))
+ {
+ ix86_expand_v1ti_rotate (ROTATE, operands);
+ DONE;
+ }
+})
+
+(define_insn_and_split "*rotlv1ti3_internal"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (rotate:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
ix86_expand_v1ti_rotate (ROTATE, operands);
DONE;
})
@@ -16044,6 +16117,24 @@
(match_operand:QI 2 "general_operand")))]
"TARGET_SSE2 && TARGET_64BIT"
{
+ if (!CONST_INT_P (operands[2]))
+ {
+ ix86_expand_v1ti_rotate (ROTATERT, operands);
+ DONE;
+ }
+})
+
+(define_insn_and_split "*rotrv1ti3_internal"
+ [(set (match_operand:V1TI 0 "register_operand")
+ (rotatert:V1TI
+ (match_operand:V1TI 1 "register_operand")
+ (match_operand:SI 2 "const_0_to_255_operand")))]
+ "TARGET_SSE2 && TARGET_64BIT
+ && ix86_pre_reload_split ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
ix86_expand_v1ti_rotate (ROTATERT, operands);
DONE;
})
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index a183b6a..032f426 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -268,10 +268,25 @@
(define_expand "movoo"
[(set (match_operand:OO 0 "nonimmediate_operand")
(match_operand:OO 1 "input_operand"))]
- "TARGET_MMA"
+ ""
{
- rs6000_emit_move (operands[0], operands[1], OOmode);
- DONE;
+ if (TARGET_MMA)
+ {
+ rs6000_emit_move (operands[0], operands[1], OOmode);
+ DONE;
+ }
+ else if (currently_expanding_to_rtl && seen_error ())
+ {
+ /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
+ built-in function, even if we have already emitted error messages about
+ some missing required conditions. As shown in that PR, without one
+ explicit mov optab on OOmode provided, it would call emit_move_insn
+ recursively. So we allow this pattern to be generated when we are
+ expanding to RTL and have seen errors. It would not cause further ICEs
+ as the compilation would stop soon after expanding. */
+ }
+ else
+ gcc_unreachable ();
})
(define_insn_and_split "*movoo"
@@ -300,10 +315,22 @@
(define_expand "movxo"
[(set (match_operand:XO 0 "nonimmediate_operand")
(match_operand:XO 1 "input_operand"))]
- "TARGET_MMA"
+ ""
{
- rs6000_emit_move (operands[0], operands[1], XOmode);
- DONE;
+ if (TARGET_MMA)
+ {
+ rs6000_emit_move (operands[0], operands[1], XOmode);
+ DONE;
+ }
+ else if (currently_expanding_to_rtl && seen_error ())
+ {
+ /* PR103353 shows we may want to continue to expand the __builtin_vsx_lxvp
+ built-in function, even if we have already emitted error messages about
+ some missing required conditions. So do the same handlings for XOmode
+ as OOmode here. */
+ }
+ else
+ gcc_unreachable ();
})
(define_insn_and_split "*movxo"
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 2819773..12afa86 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -830,44 +830,8 @@ rs6000_init_builtins (void)
fprintf (stderr, "\nAutogenerated built-in functions:\n\n");
for (int i = 1; i < (int) RS6000_BIF_MAX; i++)
{
- bif_enable e = rs6000_builtin_info[i].enable;
- if (e == ENB_P5 && !TARGET_POPCNTB)
- continue;
- if (e == ENB_P6 && !TARGET_CMPB)
- continue;
- if (e == ENB_P6_64 && !(TARGET_CMPB && TARGET_POWERPC64))
- continue;
- if (e == ENB_ALTIVEC && !TARGET_ALTIVEC)
- continue;
- if (e == ENB_VSX && !TARGET_VSX)
- continue;
- if (e == ENB_P7 && !TARGET_POPCNTD)
- continue;
- if (e == ENB_P7_64 && !(TARGET_POPCNTD && TARGET_POWERPC64))
- continue;
- if (e == ENB_P8 && !TARGET_DIRECT_MOVE)
- continue;
- if (e == ENB_P8V && !TARGET_P8_VECTOR)
- continue;
- if (e == ENB_P9 && !TARGET_MODULO)
- continue;
- if (e == ENB_P9_64 && !(TARGET_MODULO && TARGET_POWERPC64))
- continue;
- if (e == ENB_P9V && !TARGET_P9_VECTOR)
- continue;
- if (e == ENB_IEEE128_HW && !TARGET_FLOAT128_HW)
- continue;
- if (e == ENB_DFP && !TARGET_DFP)
- continue;
- if (e == ENB_CRYPTO && !TARGET_CRYPTO)
- continue;
- if (e == ENB_HTM && !TARGET_HTM)
- continue;
- if (e == ENB_P10 && !TARGET_POWER10)
- continue;
- if (e == ENB_P10_64 && !(TARGET_POWER10 && TARGET_POWERPC64))
- continue;
- if (e == ENB_MMA && !TARGET_MMA)
+ enum rs6000_gen_builtins fn_code = (enum rs6000_gen_builtins) i;
+ if (!rs6000_builtin_is_supported (fn_code))
continue;
tree fntype = rs6000_builtin_info[i].fntype;
tree t = TREE_TYPE (fntype);
@@ -3370,29 +3334,8 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
but check for actual availability now, during expand time. For
invalid builtins, generate a normal call. */
bifdata *bifaddr = &rs6000_builtin_info[uns_fcode];
- bif_enable e = bifaddr->enable;
-
- if (!(e == ENB_ALWAYS
- || (e == ENB_P5 && TARGET_POPCNTB)
- || (e == ENB_P6 && TARGET_CMPB)
- || (e == ENB_P6_64 && TARGET_CMPB && TARGET_POWERPC64)
- || (e == ENB_ALTIVEC && TARGET_ALTIVEC)
- || (e == ENB_CELL && TARGET_ALTIVEC && rs6000_cpu == PROCESSOR_CELL)
- || (e == ENB_VSX && TARGET_VSX)
- || (e == ENB_P7 && TARGET_POPCNTD)
- || (e == ENB_P7_64 && TARGET_POPCNTD && TARGET_POWERPC64)
- || (e == ENB_P8 && TARGET_DIRECT_MOVE)
- || (e == ENB_P8V && TARGET_P8_VECTOR)
- || (e == ENB_P9 && TARGET_MODULO)
- || (e == ENB_P9_64 && TARGET_MODULO && TARGET_POWERPC64)
- || (e == ENB_P9V && TARGET_P9_VECTOR)
- || (e == ENB_IEEE128_HW && TARGET_FLOAT128_HW)
- || (e == ENB_DFP && TARGET_DFP)
- || (e == ENB_CRYPTO && TARGET_CRYPTO)
- || (e == ENB_HTM && TARGET_HTM)
- || (e == ENB_P10 && TARGET_POWER10)
- || (e == ENB_P10_64 && TARGET_POWER10 && TARGET_POWERPC64)
- || (e == ENB_MMA && TARGET_MMA)))
+
+ if (!rs6000_builtin_is_supported (fcode))
{
rs6000_invalid_builtin (fcode);
return expand_call (exp, target, ignore);
diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h
index 8ee8c98..b9e82c0 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -82,7 +82,6 @@ extern const char *rs6000_machine;
from rs6000-logue.cc */
extern int uses_TOC (void);
-extern bool rs6000_global_entry_point_needed_p (void);
extern void rs6000_output_function_prologue (FILE *file);
extern void rs6000_output_function_epilogue (FILE *file);
extern bool rs6000_function_ok_for_sibcall (tree decl, tree exp);