aboutsummaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorMartin Liska <mliska@suse.cz>2022-03-15 10:13:03 +0100
committerMartin Liska <mliska@suse.cz>2022-03-15 10:13:03 +0100
commitcad2e08f6c249937e10ad5ae0d4a117923979efb (patch)
tree4c111a54b7fa0e5fa26391d417da4ec113007f3e /gcc/config
parent604bf5da51533a218c0393cb5115bae7c8d95282 (diff)
parent49fb0af9bf8f16907980d383c2bbc85e185ec2e0 (diff)
downloadgcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.zip
gcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.tar.gz
gcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.tar.bz2
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/bfin/bfin.cc3
-rw-r--r--gcc/config/i386/emmintrin.h5
-rw-r--r--gcc/config/i386/i386-expand.cc97
-rw-r--r--gcc/config/i386/i386-protos.h5
-rw-r--r--gcc/config/i386/i386.cc21
-rw-r--r--gcc/config/i386/i386.md56
-rw-r--r--gcc/config/mips/mips.cc9
-rw-r--r--gcc/config/mips/mips.h7
-rw-r--r--gcc/config/nvptx/nvptx.cc60
-rw-r--r--gcc/config/nvptx/nvptx.h1
-rw-r--r--gcc/config/nvptx/nvptx.md79
-rw-r--r--gcc/config/nvptx/nvptx.opt2
-rw-r--r--gcc/config/nvptx/t-nvptx4
-rw-r--r--gcc/config/rs6000/rs6000.cc12
-rw-r--r--gcc/config/rs6000/vsx.md2
15 files changed, 261 insertions, 102 deletions
diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc
index c749b48..b2a9142 100644
--- a/gcc/config/bfin/bfin.cc
+++ b/gcc/config/bfin/bfin.cc
@@ -4763,7 +4763,8 @@ bfin_handle_longcall_attribute (tree *node, tree name,
&& lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
{
warning (OPT_Wattributes,
- "cannott apply both longcall and shortcall attributes to the same function");
+ "cannot apply both %<longcall%> and %<shortcall%> attributes "
+ "to the same function");
*no_add_attrs = true;
}
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index a81deb6..654a8e8 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -718,14 +718,13 @@ _mm_loadu_si64 (void const *__P)
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si32 (void const *__P)
{
- return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0);
+ return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]);
}
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_loadu_si16 (void const *__P)
{
- return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0,
- (short)0, (short)0, (short)0, (short)0);
+ return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]);
}
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index 530f83f..e85641d 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -12232,46 +12232,14 @@ ix86_expand_vec_set_builtin (tree exp)
return target;
}
-/* Expand an expression EXP that calls a built-in function,
- with result going to TARGET if that's convenient
- (and in mode MODE if that's convenient).
- SUBTARGET may be used as the target for computing one of EXP's operands.
- IGNORE is nonzero if the value is to be ignored. */
-
-rtx
-ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
- machine_mode mode, int ignore)
+/* Return true if the necessary isa options for this builtin exist,
+ else false.
+ fcode = DECL_MD_FUNCTION_CODE (fndecl); */
+bool
+ix86_check_builtin_isa_match (unsigned int fcode,
+ HOST_WIDE_INT* pbisa,
+ HOST_WIDE_INT* pbisa2)
{
- size_t i;
- enum insn_code icode, icode2;
- tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
- tree arg0, arg1, arg2, arg3, arg4;
- rtx op0, op1, op2, op3, op4, pat, pat2, insn;
- machine_mode mode0, mode1, mode2, mode3, mode4;
- unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
-
- /* For CPU builtins that can be folded, fold first and expand the fold. */
- switch (fcode)
- {
- case IX86_BUILTIN_CPU_INIT:
- {
- /* Make it call __cpu_indicator_init in libgcc. */
- tree call_expr, fndecl, type;
- type = build_function_type_list (integer_type_node, NULL_TREE);
- fndecl = build_fn_decl ("__cpu_indicator_init", type);
- call_expr = build_call_expr (fndecl, 0);
- return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
- }
- case IX86_BUILTIN_CPU_IS:
- case IX86_BUILTIN_CPU_SUPPORTS:
- {
- tree arg0 = CALL_EXPR_ARG (exp, 0);
- tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
- gcc_assert (fold_expr != NULL_TREE);
- return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
- }
- }
-
HOST_WIDE_INT isa = ix86_isa_flags;
HOST_WIDE_INT isa2 = ix86_isa_flags2;
HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa;
@@ -12321,7 +12289,56 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
bisa |= OPTION_MASK_ISA_SSE2;
}
- if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2)
+ if (pbisa)
+ *pbisa = bisa;
+ if (pbisa2)
+ *pbisa2 = bisa2;
+
+ return (bisa & isa) == bisa && (bisa2 & isa2) == bisa2;
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+rtx
+ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
+ machine_mode mode, int ignore)
+{
+ size_t i;
+ enum insn_code icode, icode2;
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ tree arg0, arg1, arg2, arg3, arg4;
+ rtx op0, op1, op2, op3, op4, pat, pat2, insn;
+ machine_mode mode0, mode1, mode2, mode3, mode4;
+ unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
+ HOST_WIDE_INT bisa, bisa2;
+
+ /* For CPU builtins that can be folded, fold first and expand the fold. */
+ switch (fcode)
+ {
+ case IX86_BUILTIN_CPU_INIT:
+ {
+ /* Make it call __cpu_indicator_init in libgcc. */
+ tree call_expr, fndecl, type;
+ type = build_function_type_list (integer_type_node, NULL_TREE);
+ fndecl = build_fn_decl ("__cpu_indicator_init", type);
+ call_expr = build_call_expr (fndecl, 0);
+ return expand_expr (call_expr, target, mode, EXPAND_NORMAL);
+ }
+ case IX86_BUILTIN_CPU_IS:
+ case IX86_BUILTIN_CPU_SUPPORTS:
+ {
+ tree arg0 = CALL_EXPR_ARG (exp, 0);
+ tree fold_expr = fold_builtin_cpu (fndecl, &arg0);
+ gcc_assert (fold_expr != NULL_TREE);
+ return expand_expr (fold_expr, target, mode, EXPAND_NORMAL);
+ }
+ }
+
+ if (!ix86_check_builtin_isa_match (fcode, &bisa, &bisa2))
{
bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT;
if (TARGET_ABI_X32)
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index d5e1125..3596ce8 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -53,6 +53,7 @@ extern bool ix86_using_red_zone (void);
extern rtx ix86_gen_scratch_sse_rtx (machine_mode);
extern unsigned int ix86_regmode_natural_size (machine_mode);
+extern bool ix86_check_builtin_isa_match (unsigned int fcode);
#ifdef RTX_CODE
extern int standard_80387_constant_p (rtx);
extern const char *standard_80387_constant_opcode (rtx);
@@ -405,3 +406,7 @@ extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
(gcc::context *);
extern bool ix86_has_no_direct_extern_access;
+
+/* In i386-expand.cc. */
+bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
+ HOST_WIDE_INT*);
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 4121f98..d77ad83 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -18286,6 +18286,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
bool is_vshift;
unsigned HOST_WIDE_INT elems;
+ /* Don't fold when there's isa mismatch. */
+ if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL))
+ return false;
+
switch (fn_code)
{
case IX86_BUILTIN_TZCNT32:
@@ -22597,16 +22601,21 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
case vec_construct:
{
- /* N element inserts into SSE vectors. */
- int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op;
+ int n = TYPE_VECTOR_SUBPARTS (vectype);
+ /* N - 1 element inserts into an SSE vector, the possible
+ GPR -> XMM move is accounted for in add_stmt_cost. */
+ if (GET_MODE_BITSIZE (mode) <= 128)
+ return (n - 1) * ix86_cost->sse_op;
/* One vinserti128 for combining two SSE vectors for AVX256. */
- if (GET_MODE_BITSIZE (mode) == 256)
- cost += ix86_vec_cost (mode, ix86_cost->addss);
+ else if (GET_MODE_BITSIZE (mode) == 256)
+ return ((n - 2) * ix86_cost->sse_op
+ + ix86_vec_cost (mode, ix86_cost->addss));
/* One vinserti64x4 and two vinserti128 for combining SSE
and AVX256 vectors to AVX512. */
else if (GET_MODE_BITSIZE (mode) == 512)
- cost += 3 * ix86_vec_cost (mode, ix86_cost->addss);
- return cost;
+ return ((n - 4) * ix86_cost->sse_op
+ + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+ gcc_unreachable ();
}
default:
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index d15170e..46a2663 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3180,6 +3180,38 @@
(const_int 8))
(subreg:SWI248 (match_dup 1) 0))])
+;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah
+(define_peephole2
+ [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (zero_extract:SWI248 (match_operand:SWI248 1 "general_reg_operand")
+ (const_int 8)
+ (const_int 8))
+ (const_int 0))]
+ "REGNO (operands[0]) == REGNO (operands[1])"
+ [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])])
+
+;; Combine movl followed by movb.
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_operand:SWI48 1 "const_int_operand"))
+ (set (zero_extract:SWI248 (match_operand:SWI248 2 "general_reg_operand")
+ (const_int 8)
+ (const_int 8))
+ (match_operand:SWI248 3 "const_int_operand"))]
+ "REGNO (operands[0]) == REGNO (operands[2])"
+ [(set (match_operand:SWI48 0 "general_reg_operand")
+ (match_dup 4))]
+{
+ HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00;
+ tmp |= (INTVAL (operands[3]) & 0xff) << 8;
+ operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode);
+})
+
+
(define_code_iterator any_extract [sign_extract zero_extract])
(define_insn "*insvqi_2"
@@ -4276,6 +4308,30 @@
[(set_attr "isa" "*,avx512dq,avx512dq")
(set_attr "type" "imovx,mskmov,mskmov")
(set_attr "mode" "SI,QI,QI")])
+
+;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l.
+(define_peephole2
+ [(parallel [(set (match_operand:SWI48 0 "general_reg_operand")
+ (const_int 0))
+ (clobber (reg:CC FLAGS_REG))])
+ (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
+ (match_operand:SWI12 2 "nonimmediate_operand"))]
+ "REGNO (operands[0]) == REGNO (operands[1])
+ && (<SWI48:MODE>mode != SImode
+ || !TARGET_ZERO_EXTEND_WITH_AND
+ || !optimize_function_for_speed_p (cfun))"
+ [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
+
+;; Likewise, but preserving FLAGS_REG.
+(define_peephole2
+ [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0))
+ (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand"))
+ (match_operand:SWI12 2 "nonimmediate_operand"))]
+ "REGNO (operands[0]) == REGNO (operands[1])
+ && (<SWI48:MODE>mode != SImode
+ || !TARGET_ZERO_EXTEND_WITH_AND
+ || !optimize_function_for_speed_p (cfun))"
+ [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))])
;; Sign extension instructions
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 4f9683e..a1c4b43 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -19974,6 +19974,13 @@ mips_option_override (void)
target_flags |= MASK_64BIT;
}
+ /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in
+ order for tracebacks to be complete but not if any
+ -fasynchronous-unwind-table were already specified. */
+ if (flag_sanitize & SANITIZE_USER_ADDRESS
+ && !global_options_set.x_flag_asynchronous_unwind_tables)
+ flag_asynchronous_unwind_tables = 1;
+
if ((target_flags_explicit & MASK_FLOAT64) != 0)
{
if (mips_isa_rev >= 6 && !TARGET_FLOAT64)
@@ -22591,7 +22598,7 @@ mips_constant_alignment (const_tree exp, HOST_WIDE_INT align)
static unsigned HOST_WIDE_INT
mips_asan_shadow_offset (void)
{
- return 0x0aaa0000;
+ return SUBTARGET_SHADOW_OFFSET;
}
/* Implement TARGET_STARTING_FRAME_OFFSET. See mips_compute_frame_info
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 0029864..858bbba 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -3463,3 +3463,10 @@ struct GTY(()) machine_function {
&& !TARGET_MICROMIPS && !TARGET_FIX_24K)
#define NEED_INDICATE_EXEC_STACK 0
+
+/* Define the shadow offset for asan. Other OS's can override in the
+ respective tm.h files. */
+#ifndef SUBTARGET_SHADOW_OFFSET
+#define SUBTARGET_SHADOW_OFFSET \
+ (POINTER_SIZE == 64 ? HOST_WIDE_INT_1 << 37 : HOST_WIDE_INT_C (0x0aaa0000))
+#endif
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 6ca99a6..3a7be63 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -1364,6 +1364,13 @@ nvptx_init_unisimt_predicate (FILE *file)
int master = REGNO (cfun->machine->unisimt_master);
int pred = REGNO (cfun->machine->unisimt_predicate);
fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc);
+ if (cfun->machine->unisimt_outside_simt_predicate)
+ {
+ int pred_outside_simt
+ = REGNO (cfun->machine->unisimt_outside_simt_predicate);
+ fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, 0;\n",
+ pred_outside_simt, master);
+ }
fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n");
/* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */
fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master);
@@ -1589,6 +1596,13 @@ nvptx_output_unisimt_switch (FILE *file, bool entering)
fprintf (file, "\t{\n");
fprintf (file, "\t\t.reg.u32 %%ustmp2;\n");
fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0);
+ if (cfun->machine->unisimt_outside_simt_predicate)
+ {
+ int pred_outside_simt
+ = REGNO (cfun->machine->unisimt_outside_simt_predicate);
+ fprintf (file, "\t\tmov.pred %%r%d, %d;\n", pred_outside_simt,
+ entering ? 0 : 1);
+ }
if (!crtl->is_leaf)
{
int loc = REGNO (cfun->machine->unisimt_location);
@@ -2835,7 +2849,8 @@ nvptx_mem_maybe_shared_p (const_rtx x)
S -- print a shuffle kind specified by CONST_INT
t -- print a type opcode suffix, promoting QImode to 32 bits
T -- print a type size in bits
- u -- print a type opcode suffix without promotions. */
+ u -- print a type opcode suffix without promotions.
+ x -- print a destination operand that may also be a bit bucket. */
static void
nvptx_print_operand (FILE *file, rtx x, int code)
@@ -2863,6 +2878,14 @@ nvptx_print_operand (FILE *file, rtx x, int code)
switch (code)
{
+ case 'x':
+ if (current_output_insn != NULL
+ && find_reg_note (current_output_insn, REG_UNUSED, x) != NULL_RTX)
+ {
+ fputs ("_", file);
+ return;
+ }
+ goto common;
case 'B':
if (SYMBOL_REF_P (XEXP (x, 0)))
switch (SYMBOL_DATA_AREA (XEXP (x, 0)))
@@ -3233,6 +3256,13 @@ nvptx_get_unisimt_predicate ()
return pred ? pred : pred = gen_reg_rtx (BImode);
}
+static rtx
+nvptx_get_unisimt_outside_simt_predicate ()
+{
+ rtx &pred = cfun->machine->unisimt_outside_simt_predicate;
+ return pred ? pred : pred = gen_reg_rtx (BImode);
+}
+
/* Return true if given call insn references one of the functions provided by
the CUDA runtime: malloc, free, vprintf. */
@@ -3265,7 +3295,9 @@ static bool
nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master)
{
rtx reg;
- if (GET_CODE (set) == SET && REG_P (reg = SET_DEST (set)))
+ if (GET_CODE (set) == SET
+ && REG_P (reg = SET_DEST (set))
+ && find_reg_note (insn, REG_UNUSED, reg) == NULL_RTX)
{
emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX),
insn);
@@ -3275,6 +3307,16 @@ nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master)
return false;
}
+static void
+predicate_insn (rtx_insn *insn, rtx pred)
+{
+ rtx pat = PATTERN (insn);
+ pred = gen_rtx_NE (BImode, pred, const0_rtx);
+ pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
+ bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
+ gcc_assert (changed_p);
+}
+
/* Adjust code for uniform-simt code generation variant by making atomics and
"syscalls" conditionally executed, and inserting shuffle-based propagation
for registers being set. */
@@ -3341,10 +3383,16 @@ nvptx_reorg_uniform_simt ()
}
rtx pred = nvptx_get_unisimt_predicate ();
- pred = gen_rtx_NE (BImode, pred, const0_rtx);
- pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat);
- bool changed_p = validate_change (insn, &PATTERN (insn), pat, false);
- gcc_assert (changed_p);
+ predicate_insn (insn, pred);
+
+ pred = NULL_RTX;
+ for (rtx_insn *post = NEXT_INSN (insn); post != next;
+ post = NEXT_INSN (post))
+ {
+ if (pred == NULL_RTX)
+ pred = nvptx_get_unisimt_outside_simt_predicate ();
+ predicate_insn (post, pred);
+ }
}
}
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 3ca22a5..b55ade6 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -226,6 +226,7 @@ struct GTY(()) machine_function
rtx sync_bar; /* Synchronization barrier ID for vectors. */
rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */
rtx unisimt_predicate; /* Predicate for -muniform-simt. */
+ rtx unisimt_outside_simt_predicate; /* Predicate for -muniform-simt. */
rtx unisimt_location; /* Mask location for -muniform-simt. */
/* The following two fields hold the maximum size resp. alignment required
for per-lane storage in OpenMP SIMD regions. */
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index a453c1d..1dec7ca 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -172,8 +172,8 @@
return SYMBOL_REF_FUNCTION_P (op);
})
-(define_attr "predicable" "false,true"
- (const_string "true"))
+(define_attr "predicable" "no,yes"
+ (const_string "yes"))
(define_cond_exec
[(match_operator 0 "predicate_operator"
@@ -911,7 +911,7 @@
(pc)))]
""
"%j0\\tbra\\t%l1;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "br_false"
[(set (pc)
@@ -921,7 +921,7 @@
(pc)))]
""
"%J0\\tbra\\t%l1;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
;; unified conditional branch
(define_insn "br_true_uni"
@@ -931,7 +931,7 @@
(label_ref (match_operand 1 "" "")) (pc)))]
""
"%j0\\tbra.uni\\t%l1;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "br_false_uni"
[(set (pc) (if_then_else
@@ -940,7 +940,7 @@
(label_ref (match_operand 1 "" "")) (pc)))]
""
"%J0\\tbra.uni\\t%l1;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "cbranch<mode>4"
[(set (pc)
@@ -1619,7 +1619,7 @@
{
return nvptx_output_return ();
}
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "epilogue"
[(clobber (const_int 0))]
@@ -1712,7 +1712,7 @@
(const_int 0))]
""
"%j0 trap; %j0 exit;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "trap_if_false"
[(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
@@ -1720,7 +1720,7 @@
(const_int 0))]
""
"%J0 trap; %J0 exit;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "ctrap<mode>4"
[(trap_if (match_operator 0 "nvptx_comparison_operator"
@@ -1769,28 +1769,28 @@
UNSPECV_FORK)]
""
"// fork %0;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_forked"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_FORKED)]
""
"// forked %0;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_joining"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOINING)]
""
"// joining %0;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_join"
[(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
UNSPECV_JOIN)]
""
"// join %0;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "oacc_fork"
[(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
@@ -2035,7 +2035,7 @@
output_asm_insn ("}", NULL);
return "";
}
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "atomic_compare_and_swap<mode>_1"
[(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
@@ -2050,7 +2050,7 @@
""
{
const char *t
- = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;";
+ = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;";
return nvptx_output_atomic_insn (t, operands, 1, 4);
}
[(set_attr "atomic" "true")])
@@ -2076,7 +2076,7 @@
return "";
}
const char *t
- = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;";
+ = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
}
[(set_attr "atomic" "true")])
@@ -2166,7 +2166,7 @@
return "";
}
const char *t
- = "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
+ = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
}
[(set_attr "atomic" "true")])
@@ -2196,7 +2196,7 @@
return "";
}
const char *t
- = "%.\\tatom%A1.add%t0\\t%0, %1, %2;";
+ = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
}
[(set_attr "atomic" "true")])
@@ -2226,7 +2226,7 @@
return "";
}
const char *t
- = "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;";
+ = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;";
return nvptx_output_atomic_insn (t, operands, 1, 3);
}
@@ -2263,30 +2263,33 @@
? "\\tbarrier.sync\\t%0, %1;"
: "\\tbar.sync\\t%0, %1;");
}
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_warpsync"
[(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)]
"TARGET_PTX_6_0"
- "\\tbar.warp.sync\\t0xffffffff;"
- [(set_attr "predicable" "false")])
+ "%.\\tbar.warp.sync\\t0xffffffff;")
(define_insn "nvptx_uniform_warp_check"
[(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)]
""
{
- output_asm_insn ("{", NULL);
- output_asm_insn ("\\t" ".reg.b32" "\\t" "act;", NULL);
- output_asm_insn ("\\t" "vote.ballot.b32" "\\t" "act,1;", NULL);
- output_asm_insn ("\\t" ".reg.pred" "\\t" "uni;", NULL);
- output_asm_insn ("\\t" "setp.eq.b32" "\\t" "uni,act,0xffffffff;",
- NULL);
- output_asm_insn ("@ !uni\\t" "trap;", NULL);
- output_asm_insn ("@ !uni\\t" "exit;", NULL);
- output_asm_insn ("}", NULL);
+ const char *insns[] = {
+ "{",
+ "\\t" ".reg.b32" "\\t" "act;",
+ "%.\\t" "vote.ballot.b32" "\\t" "act,1;",
+ "\\t" ".reg.pred" "\\t" "do_abort;",
+ "\\t" "mov.pred" "\\t" "do_abort,0;",
+ "%.\\t" "setp.ne.b32" "\\t" "do_abort,act,0xffffffff;",
+ "@ do_abort\\t" "trap;",
+ "@ do_abort\\t" "exit;",
+ "}",
+ NULL
+ };
+ for (const char **p = &insns[0]; *p != NULL; p++)
+ output_asm_insn (*p, NULL);
return "";
- }
- [(set_attr "predicable" "false")])
+ })
(define_expand "memory_barrier"
[(set (match_dup 0)
@@ -2307,7 +2310,7 @@
(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
""
"\\tmembar.sys;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "nvptx_membar_cta"
[(set (match_dup 0)
@@ -2323,7 +2326,7 @@
(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
""
"\\tmembar.cta;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_expand "nvptx_membar_gl"
[(set (match_dup 0)
@@ -2339,13 +2342,13 @@
(unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))]
""
"\\tmembar.gl;"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_nounroll"
[(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
""
"\\t.pragma \\\"nounroll\\\";"
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
(define_insn "nvptx_red_partition"
[(set (match_operand:DI 0 "nonimmediate_operand" "=R")
@@ -2355,7 +2358,7 @@
{
return nvptx_output_red_partition (operands[0], operands[1]);
}
- [(set_attr "predicable" "false")])
+ [(set_attr "predicable" "no")])
;; Expand QI mode operations using SI mode instructions.
(define_code_iterator any_sbinary [plus minus smin smax])
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index c83ceb3..fea99c5 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -53,7 +53,7 @@ Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt.
; Default needs to be in sync with default in ASM_SPEC in nvptx.h.
misa=
-Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35)
+Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM30)
Specify the version of the ptx ISA to use.
Enum
diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx
index 8f67264..b63c4a5 100644
--- a/gcc/config/nvptx/t-nvptx
+++ b/gcc/config/nvptx/t-nvptx
@@ -30,6 +30,4 @@ s-nvptx-gen-opt: $(srcdir)/config/nvptx/nvptx-sm.def
tmp-nvptx-gen.opt $(srcdir)/config/nvptx/nvptx-gen.opt
$(STAMP) s-nvptx-gen-opt
-MULTILIB_OPTIONS = mgomp
-
-MULTILIB_EXTRA_OPTS = misa=sm_30 mptx=3.1
+MULTILIB_OPTIONS = mgomp mptx=3.1
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 3afe78f..283e830 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -5804,20 +5804,28 @@ rs6000_machine_from_flags (void)
if (rs6000_cpu == PROCESSOR_MPCCORE)
return "\"821\"";
+#if 0
+ /* This (and ppc64 below) are disabled here (for now at least) because
+ PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON
+ are #define'd as some of these. Untangling that is a job for later. */
+
/* 600 series and 700 series, "classic" */
if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603
|| rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e
- || rs6000_cpu == PROCESSOR_PPC750 || rs6000_cpu == PROCESSOR_POWERPC)
+ || rs6000_cpu == PROCESSOR_PPC750)
return "ppc";
+#endif
/* Classic with AltiVec, "G4" */
if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450)
return "\"7450\"";
+#if 0
/* The older 64-bit CPUs */
if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630
- || rs6000_cpu == PROCESSOR_RS64A || rs6000_cpu == PROCESSOR_POWERPC64)
+ || rs6000_cpu == PROCESSOR_RS64A)
return "ppc64";
+#endif
HOST_WIDE_INT flags = rs6000_isa_flags;
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index d0fb92f..15bd86d 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -5033,7 +5033,7 @@
;; generate the vextsd2q instruction.
(define_insn_and_split "extendditi2"
[(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v")
- (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,r,wa,Z")))
+ (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z")))
(clobber (reg:DI CA_REGNO))]
"TARGET_POWERPC64 && TARGET_POWER10"
"#"