diff options
author | Martin Liska <mliska@suse.cz> | 2022-03-15 10:13:03 +0100 |
---|---|---|
committer | Martin Liska <mliska@suse.cz> | 2022-03-15 10:13:03 +0100 |
commit | cad2e08f6c249937e10ad5ae0d4a117923979efb (patch) | |
tree | 4c111a54b7fa0e5fa26391d417da4ec113007f3e /gcc/config | |
parent | 604bf5da51533a218c0393cb5115bae7c8d95282 (diff) | |
parent | 49fb0af9bf8f16907980d383c2bbc85e185ec2e0 (diff) | |
download | gcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.zip gcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.tar.gz gcc-cad2e08f6c249937e10ad5ae0d4a117923979efb.tar.bz2 |
Merge branch 'master' into devel/sphinx
Diffstat (limited to 'gcc/config')
-rw-r--r-- | gcc/config/bfin/bfin.cc | 3 | ||||
-rw-r--r-- | gcc/config/i386/emmintrin.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386-expand.cc | 97 | ||||
-rw-r--r-- | gcc/config/i386/i386-protos.h | 5 | ||||
-rw-r--r-- | gcc/config/i386/i386.cc | 21 | ||||
-rw-r--r-- | gcc/config/i386/i386.md | 56 | ||||
-rw-r--r-- | gcc/config/mips/mips.cc | 9 | ||||
-rw-r--r-- | gcc/config/mips/mips.h | 7 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.cc | 60 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.h | 1 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.md | 79 | ||||
-rw-r--r-- | gcc/config/nvptx/nvptx.opt | 2 | ||||
-rw-r--r-- | gcc/config/nvptx/t-nvptx | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.cc | 12 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 2 |
15 files changed, 261 insertions, 102 deletions
diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc index c749b48..b2a9142 100644 --- a/gcc/config/bfin/bfin.cc +++ b/gcc/config/bfin/bfin.cc @@ -4763,7 +4763,8 @@ bfin_handle_longcall_attribute (tree *node, tree name, && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node)))) { warning (OPT_Wattributes, - "cannott apply both longcall and shortcall attributes to the same function"); + "cannot apply both %<longcall%> and %<shortcall%> attributes " + "to the same function"); *no_add_attrs = true; } diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h index a81deb6..654a8e8 100644 --- a/gcc/config/i386/emmintrin.h +++ b/gcc/config/i386/emmintrin.h @@ -718,14 +718,13 @@ _mm_loadu_si64 (void const *__P) extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si32 (void const *__P) { - return _mm_set_epi32 (*(int *)__P, (int)0, (int)0, (int)0); + return _mm_set_epi32 (0, 0, 0, (*(__m32_u *)__P)[0]); } extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__)) _mm_loadu_si16 (void const *__P) { - return _mm_set_epi16 (*(short *)__P, (short)0, (short)0, (short)0, - (short)0, (short)0, (short)0, (short)0); + return _mm_set_epi16 (0, 0, 0, 0, 0, 0, 0, (*(__m16_u *)__P)[0]); } extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__)) diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index 530f83f..e85641d 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -12232,46 +12232,14 @@ ix86_expand_vec_set_builtin (tree exp) return target; } -/* Expand an expression EXP that calls a built-in function, - with result going to TARGET if that's convenient - (and in mode MODE if that's convenient). - SUBTARGET may be used as the target for computing one of EXP's operands. - IGNORE is nonzero if the value is to be ignored. */ - -rtx -ix86_expand_builtin (tree exp, rtx target, rtx subtarget, - machine_mode mode, int ignore) +/* Return true if the necessary isa options for this builtin exist, + else false. + fcode = DECL_MD_FUNCTION_CODE (fndecl); */ +bool +ix86_check_builtin_isa_match (unsigned int fcode, + HOST_WIDE_INT* pbisa, + HOST_WIDE_INT* pbisa2) { - size_t i; - enum insn_code icode, icode2; - tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); - tree arg0, arg1, arg2, arg3, arg4; - rtx op0, op1, op2, op3, op4, pat, pat2, insn; - machine_mode mode0, mode1, mode2, mode3, mode4; - unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); - - /* For CPU builtins that can be folded, fold first and expand the fold. */ - switch (fcode) - { - case IX86_BUILTIN_CPU_INIT: - { - /* Make it call __cpu_indicator_init in libgcc. */ - tree call_expr, fndecl, type; - type = build_function_type_list (integer_type_node, NULL_TREE); - fndecl = build_fn_decl ("__cpu_indicator_init", type); - call_expr = build_call_expr (fndecl, 0); - return expand_expr (call_expr, target, mode, EXPAND_NORMAL); - } - case IX86_BUILTIN_CPU_IS: - case IX86_BUILTIN_CPU_SUPPORTS: - { - tree arg0 = CALL_EXPR_ARG (exp, 0); - tree fold_expr = fold_builtin_cpu (fndecl, &arg0); - gcc_assert (fold_expr != NULL_TREE); - return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); - } - } - HOST_WIDE_INT isa = ix86_isa_flags; HOST_WIDE_INT isa2 = ix86_isa_flags2; HOST_WIDE_INT bisa = ix86_builtins_isa[fcode].isa; @@ -12321,7 +12289,56 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget, bisa |= OPTION_MASK_ISA_SSE2; } - if ((bisa & isa) != bisa || (bisa2 & isa2) != bisa2) + if (pbisa) + *pbisa = bisa; + if (pbisa2) + *pbisa2 = bisa2; + + return (bisa & isa) == bisa && (bisa2 & isa2) == bisa2; +} + +/* Expand an expression EXP that calls a built-in function, + with result going to TARGET if that's convenient + (and in mode MODE if that's convenient). + SUBTARGET may be used as the target for computing one of EXP's operands. + IGNORE is nonzero if the value is to be ignored. */ + +rtx +ix86_expand_builtin (tree exp, rtx target, rtx subtarget, + machine_mode mode, int ignore) +{ + size_t i; + enum insn_code icode, icode2; + tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); + tree arg0, arg1, arg2, arg3, arg4; + rtx op0, op1, op2, op3, op4, pat, pat2, insn; + machine_mode mode0, mode1, mode2, mode3, mode4; + unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl); + HOST_WIDE_INT bisa, bisa2; + + /* For CPU builtins that can be folded, fold first and expand the fold. */ + switch (fcode) + { + case IX86_BUILTIN_CPU_INIT: + { + /* Make it call __cpu_indicator_init in libgcc. */ + tree call_expr, fndecl, type; + type = build_function_type_list (integer_type_node, NULL_TREE); + fndecl = build_fn_decl ("__cpu_indicator_init", type); + call_expr = build_call_expr (fndecl, 0); + return expand_expr (call_expr, target, mode, EXPAND_NORMAL); + } + case IX86_BUILTIN_CPU_IS: + case IX86_BUILTIN_CPU_SUPPORTS: + { + tree arg0 = CALL_EXPR_ARG (exp, 0); + tree fold_expr = fold_builtin_cpu (fndecl, &arg0); + gcc_assert (fold_expr != NULL_TREE); + return expand_expr (fold_expr, target, mode, EXPAND_NORMAL); + } + } + + if (!ix86_check_builtin_isa_match (fcode, &bisa, &bisa2)) { bool add_abi_p = bisa & OPTION_MASK_ISA_64BIT; if (TARGET_ABI_X32) diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index d5e1125..3596ce8 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -53,6 +53,7 @@ extern bool ix86_using_red_zone (void); extern rtx ix86_gen_scratch_sse_rtx (machine_mode); extern unsigned int ix86_regmode_natural_size (machine_mode); +extern bool ix86_check_builtin_isa_match (unsigned int fcode); #ifdef RTX_CODE extern int standard_80387_constant_p (rtx); extern const char *standard_80387_constant_opcode (rtx); @@ -405,3 +406,7 @@ extern rtl_opt_pass *make_pass_remove_partial_avx_dependency (gcc::context *); extern bool ix86_has_no_direct_extern_access; + +/* In i386-expand.cc. */ +bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*, + HOST_WIDE_INT*); diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc index 4121f98..d77ad83 100644 --- a/gcc/config/i386/i386.cc +++ b/gcc/config/i386/i386.cc @@ -18286,6 +18286,10 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi) bool is_vshift; unsigned HOST_WIDE_INT elems; + /* Don't fold when there's isa mismatch. */ + if (!ix86_check_builtin_isa_match (fn_code, NULL, NULL)) + return false; + switch (fn_code) { case IX86_BUILTIN_TZCNT32: @@ -22597,16 +22601,21 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, case vec_construct: { - /* N element inserts into SSE vectors. */ - int cost = TYPE_VECTOR_SUBPARTS (vectype) * ix86_cost->sse_op; + int n = TYPE_VECTOR_SUBPARTS (vectype); + /* N - 1 element inserts into an SSE vector, the possible + GPR -> XMM move is accounted for in add_stmt_cost. */ + if (GET_MODE_BITSIZE (mode) <= 128) + return (n - 1) * ix86_cost->sse_op; /* One vinserti128 for combining two SSE vectors for AVX256. */ - if (GET_MODE_BITSIZE (mode) == 256) - cost += ix86_vec_cost (mode, ix86_cost->addss); + else if (GET_MODE_BITSIZE (mode) == 256) + return ((n - 2) * ix86_cost->sse_op + + ix86_vec_cost (mode, ix86_cost->addss)); /* One vinserti64x4 and two vinserti128 for combining SSE and AVX256 vectors to AVX512. */ else if (GET_MODE_BITSIZE (mode) == 512) - cost += 3 * ix86_vec_cost (mode, ix86_cost->addss); - return cost; + return ((n - 4) * ix86_cost->sse_op + + 3 * ix86_vec_cost (mode, ix86_cost->addss)); + gcc_unreachable (); } default: diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md index d15170e..46a2663 100644 --- a/gcc/config/i386/i386.md +++ b/gcc/config/i386/i386.md @@ -3180,6 +3180,38 @@ (const_int 8)) (subreg:SWI248 (match_dup 1) 0))]) +;; Eliminate redundant insv, e.g. xorl %eax,%eax; movb $0, %ah +(define_peephole2 + [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") + (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (zero_extract:SWI248 (match_operand:SWI248 1 "general_reg_operand") + (const_int 8) + (const_int 8)) + (const_int 0))] + "REGNO (operands[0]) == REGNO (operands[1])" + [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") + (const_int 0)) + (clobber (reg:CC FLAGS_REG))])]) + +;; Combine movl followed by movb. +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_operand:SWI48 1 "const_int_operand")) + (set (zero_extract:SWI248 (match_operand:SWI248 2 "general_reg_operand") + (const_int 8) + (const_int 8)) + (match_operand:SWI248 3 "const_int_operand"))] + "REGNO (operands[0]) == REGNO (operands[2])" + [(set (match_operand:SWI48 0 "general_reg_operand") + (match_dup 4))] +{ + HOST_WIDE_INT tmp = INTVAL (operands[1]) & ~(HOST_WIDE_INT)0xff00; + tmp |= (INTVAL (operands[3]) & 0xff) << 8; + operands[4] = gen_int_mode (tmp, <SWI48:MODE>mode); +}) + + (define_code_iterator any_extract [sign_extract zero_extract]) (define_insn "*insvqi_2" @@ -4276,6 +4308,30 @@ [(set_attr "isa" "*,avx512dq,avx512dq") (set_attr "type" "imovx,mskmov,mskmov") (set_attr "mode" "SI,QI,QI")]) + +;; Transform xorl; mov[bw] (set strict_low_part) into movz[bw]l. +(define_peephole2 + [(parallel [(set (match_operand:SWI48 0 "general_reg_operand") + (const_int 0)) + (clobber (reg:CC FLAGS_REG))]) + (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand")) + (match_operand:SWI12 2 "nonimmediate_operand"))] + "REGNO (operands[0]) == REGNO (operands[1]) + && (<SWI48:MODE>mode != SImode + || !TARGET_ZERO_EXTEND_WITH_AND + || !optimize_function_for_speed_p (cfun))" + [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))]) + +;; Likewise, but preserving FLAGS_REG. +(define_peephole2 + [(set (match_operand:SWI48 0 "general_reg_operand") (const_int 0)) + (set (strict_low_part (match_operand:SWI12 1 "general_reg_operand")) + (match_operand:SWI12 2 "nonimmediate_operand"))] + "REGNO (operands[0]) == REGNO (operands[1]) + && (<SWI48:MODE>mode != SImode + || !TARGET_ZERO_EXTEND_WITH_AND + || !optimize_function_for_speed_p (cfun))" + [(set (match_dup 0) (zero_extend:SWI48 (match_dup 2)))]) ;; Sign extension instructions diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc index 4f9683e..a1c4b43 100644 --- a/gcc/config/mips/mips.cc +++ b/gcc/config/mips/mips.cc @@ -19974,6 +19974,13 @@ mips_option_override (void) target_flags |= MASK_64BIT; } + /* -fsanitize=address needs to turn on -fasynchronous-unwind-tables in + order for tracebacks to be complete but not if any + -fasynchronous-unwind-table were already specified. */ + if (flag_sanitize & SANITIZE_USER_ADDRESS + && !global_options_set.x_flag_asynchronous_unwind_tables) + flag_asynchronous_unwind_tables = 1; + if ((target_flags_explicit & MASK_FLOAT64) != 0) { if (mips_isa_rev >= 6 && !TARGET_FLOAT64) @@ -22591,7 +22598,7 @@ mips_constant_alignment (const_tree exp, HOST_WIDE_INT align) static unsigned HOST_WIDE_INT mips_asan_shadow_offset (void) { - return 0x0aaa0000; + return SUBTARGET_SHADOW_OFFSET; } /* Implement TARGET_STARTING_FRAME_OFFSET. See mips_compute_frame_info diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h index 0029864..858bbba 100644 --- a/gcc/config/mips/mips.h +++ b/gcc/config/mips/mips.h @@ -3463,3 +3463,10 @@ struct GTY(()) machine_function { && !TARGET_MICROMIPS && !TARGET_FIX_24K) #define NEED_INDICATE_EXEC_STACK 0 + +/* Define the shadow offset for asan. Other OS's can override in the + respective tm.h files. */ +#ifndef SUBTARGET_SHADOW_OFFSET +#define SUBTARGET_SHADOW_OFFSET \ + (POINTER_SIZE == 64 ? HOST_WIDE_INT_1 << 37 : HOST_WIDE_INT_C (0x0aaa0000)) +#endif diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc index 6ca99a6..3a7be63 100644 --- a/gcc/config/nvptx/nvptx.cc +++ b/gcc/config/nvptx/nvptx.cc @@ -1364,6 +1364,13 @@ nvptx_init_unisimt_predicate (FILE *file) int master = REGNO (cfun->machine->unisimt_master); int pred = REGNO (cfun->machine->unisimt_predicate); fprintf (file, "\t\tld.shared.u32 %%r%d, [%%r%d];\n", master, loc); + if (cfun->machine->unisimt_outside_simt_predicate) + { + int pred_outside_simt + = REGNO (cfun->machine->unisimt_outside_simt_predicate); + fprintf (file, "\t\tsetp.eq.u32 %%r%d, %%r%d, 0;\n", + pred_outside_simt, master); + } fprintf (file, "\t\tmov.u32 %%ustmp0, %%laneid;\n"); /* Compute 'master lane index' as 'laneid & __nvptx_uni[tid.y]'. */ fprintf (file, "\t\tand.b32 %%r%d, %%r%d, %%ustmp0;\n", master, master); @@ -1589,6 +1596,13 @@ nvptx_output_unisimt_switch (FILE *file, bool entering) fprintf (file, "\t{\n"); fprintf (file, "\t\t.reg.u32 %%ustmp2;\n"); fprintf (file, "\t\tmov.u32 %%ustmp2, %d;\n", entering ? -1 : 0); + if (cfun->machine->unisimt_outside_simt_predicate) + { + int pred_outside_simt + = REGNO (cfun->machine->unisimt_outside_simt_predicate); + fprintf (file, "\t\tmov.pred %%r%d, %d;\n", pred_outside_simt, + entering ? 0 : 1); + } if (!crtl->is_leaf) { int loc = REGNO (cfun->machine->unisimt_location); @@ -2835,7 +2849,8 @@ nvptx_mem_maybe_shared_p (const_rtx x) S -- print a shuffle kind specified by CONST_INT t -- print a type opcode suffix, promoting QImode to 32 bits T -- print a type size in bits - u -- print a type opcode suffix without promotions. */ + u -- print a type opcode suffix without promotions. + x -- print a destination operand that may also be a bit bucket. */ static void nvptx_print_operand (FILE *file, rtx x, int code) @@ -2863,6 +2878,14 @@ nvptx_print_operand (FILE *file, rtx x, int code) switch (code) { + case 'x': + if (current_output_insn != NULL + && find_reg_note (current_output_insn, REG_UNUSED, x) != NULL_RTX) + { + fputs ("_", file); + return; + } + goto common; case 'B': if (SYMBOL_REF_P (XEXP (x, 0))) switch (SYMBOL_DATA_AREA (XEXP (x, 0))) @@ -3233,6 +3256,13 @@ nvptx_get_unisimt_predicate () return pred ? pred : pred = gen_reg_rtx (BImode); } +static rtx +nvptx_get_unisimt_outside_simt_predicate () +{ + rtx &pred = cfun->machine->unisimt_outside_simt_predicate; + return pred ? pred : pred = gen_reg_rtx (BImode); +} + /* Return true if given call insn references one of the functions provided by the CUDA runtime: malloc, free, vprintf. */ @@ -3265,7 +3295,9 @@ static bool nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master) { rtx reg; - if (GET_CODE (set) == SET && REG_P (reg = SET_DEST (set))) + if (GET_CODE (set) == SET + && REG_P (reg = SET_DEST (set)) + && find_reg_note (insn, REG_UNUSED, reg) == NULL_RTX) { emit_insn_after (nvptx_gen_shuffle (reg, reg, master, SHUFFLE_IDX), insn); @@ -3275,6 +3307,16 @@ nvptx_unisimt_handle_set (rtx set, rtx_insn *insn, rtx master) return false; } +static void +predicate_insn (rtx_insn *insn, rtx pred) +{ + rtx pat = PATTERN (insn); + pred = gen_rtx_NE (BImode, pred, const0_rtx); + pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); + bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); + gcc_assert (changed_p); +} + /* Adjust code for uniform-simt code generation variant by making atomics and "syscalls" conditionally executed, and inserting shuffle-based propagation for registers being set. */ @@ -3341,10 +3383,16 @@ nvptx_reorg_uniform_simt () } rtx pred = nvptx_get_unisimt_predicate (); - pred = gen_rtx_NE (BImode, pred, const0_rtx); - pat = gen_rtx_COND_EXEC (VOIDmode, pred, pat); - bool changed_p = validate_change (insn, &PATTERN (insn), pat, false); - gcc_assert (changed_p); + predicate_insn (insn, pred); + + pred = NULL_RTX; + for (rtx_insn *post = NEXT_INSN (insn); post != next; + post = NEXT_INSN (post)) + { + if (pred == NULL_RTX) + pred = nvptx_get_unisimt_outside_simt_predicate (); + predicate_insn (post, pred); + } } } diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h index 3ca22a5..b55ade6 100644 --- a/gcc/config/nvptx/nvptx.h +++ b/gcc/config/nvptx/nvptx.h @@ -226,6 +226,7 @@ struct GTY(()) machine_function rtx sync_bar; /* Synchronization barrier ID for vectors. */ rtx unisimt_master; /* 'Master lane index' for -muniform-simt. */ rtx unisimt_predicate; /* Predicate for -muniform-simt. */ + rtx unisimt_outside_simt_predicate; /* Predicate for -muniform-simt. */ rtx unisimt_location; /* Mask location for -muniform-simt. */ /* The following two fields hold the maximum size resp. alignment required for per-lane storage in OpenMP SIMD regions. */ diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index a453c1d..1dec7ca 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -172,8 +172,8 @@ return SYMBOL_REF_FUNCTION_P (op); }) -(define_attr "predicable" "false,true" - (const_string "true")) +(define_attr "predicable" "no,yes" + (const_string "yes")) (define_cond_exec [(match_operator 0 "predicate_operator" @@ -911,7 +911,7 @@ (pc)))] "" "%j0\\tbra\\t%l1;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "br_false" [(set (pc) @@ -921,7 +921,7 @@ (pc)))] "" "%J0\\tbra\\t%l1;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) ;; unified conditional branch (define_insn "br_true_uni" @@ -931,7 +931,7 @@ (label_ref (match_operand 1 "" "")) (pc)))] "" "%j0\\tbra.uni\\t%l1;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "br_false_uni" [(set (pc) (if_then_else @@ -940,7 +940,7 @@ (label_ref (match_operand 1 "" "")) (pc)))] "" "%J0\\tbra.uni\\t%l1;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "cbranch<mode>4" [(set (pc) @@ -1619,7 +1619,7 @@ { return nvptx_output_return (); } - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "epilogue" [(clobber (const_int 0))] @@ -1712,7 +1712,7 @@ (const_int 0))] "" "%j0 trap; %j0 exit;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "trap_if_false" [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R") @@ -1720,7 +1720,7 @@ (const_int 0))] "" "%J0 trap; %J0 exit;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "ctrap<mode>4" [(trap_if (match_operator 0 "nvptx_comparison_operator" @@ -1769,28 +1769,28 @@ UNSPECV_FORK)] "" "// fork %0;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_forked" [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_FORKED)] "" "// forked %0;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_joining" [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_JOINING)] "" "// joining %0;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_join" [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")] UNSPECV_JOIN)] "" "// join %0;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "oacc_fork" [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "") @@ -2035,7 +2035,7 @@ output_asm_insn ("}", NULL); return ""; } - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "atomic_compare_and_swap<mode>_1" [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") @@ -2050,7 +2050,7 @@ "" { const char *t - = "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"; + = "%.\\tatom%A1.cas.b%T0\\t%x0, %1, %2, %3;"; return nvptx_output_atomic_insn (t, operands, 1, 4); } [(set_attr "atomic" "true")]) @@ -2076,7 +2076,7 @@ return ""; } const char *t - = "%.\tatom%A1.exch.b%T0\t%0, %1, %2;"; + = "%.\tatom%A1.exch.b%T0\t%x0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr "atomic" "true")]) @@ -2166,7 +2166,7 @@ return ""; } const char *t - = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr "atomic" "true")]) @@ -2196,7 +2196,7 @@ return ""; } const char *t - = "%.\\tatom%A1.add%t0\\t%0, %1, %2;"; + = "%.\\tatom%A1.add%t0\\t%x0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } [(set_attr "atomic" "true")]) @@ -2226,7 +2226,7 @@ return ""; } const char *t - = "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"; + = "%.\\tatom%A1.<logic>.b%T0\\t%x0, %1, %2;"; return nvptx_output_atomic_insn (t, operands, 1, 3); } @@ -2263,30 +2263,33 @@ ? "\\tbarrier.sync\\t%0, %1;" : "\\tbar.sync\\t%0, %1;"); } - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_warpsync" [(unspec_volatile [(const_int 0)] UNSPECV_WARPSYNC)] "TARGET_PTX_6_0" - "\\tbar.warp.sync\\t0xffffffff;" - [(set_attr "predicable" "false")]) + "%.\\tbar.warp.sync\\t0xffffffff;") (define_insn "nvptx_uniform_warp_check" [(unspec_volatile [(const_int 0)] UNSPECV_UNIFORM_WARP_CHECK)] "" { - output_asm_insn ("{", NULL); - output_asm_insn ("\\t" ".reg.b32" "\\t" "act;", NULL); - output_asm_insn ("\\t" "vote.ballot.b32" "\\t" "act,1;", NULL); - output_asm_insn ("\\t" ".reg.pred" "\\t" "uni;", NULL); - output_asm_insn ("\\t" "setp.eq.b32" "\\t" "uni,act,0xffffffff;", - NULL); - output_asm_insn ("@ !uni\\t" "trap;", NULL); - output_asm_insn ("@ !uni\\t" "exit;", NULL); - output_asm_insn ("}", NULL); + const char *insns[] = { + "{", + "\\t" ".reg.b32" "\\t" "act;", + "%.\\t" "vote.ballot.b32" "\\t" "act,1;", + "\\t" ".reg.pred" "\\t" "do_abort;", + "\\t" "mov.pred" "\\t" "do_abort,0;", + "%.\\t" "setp.ne.b32" "\\t" "do_abort,act,0xffffffff;", + "@ do_abort\\t" "trap;", + "@ do_abort\\t" "exit;", + "}", + NULL + }; + for (const char **p = &insns[0]; *p != NULL; p++) + output_asm_insn (*p, NULL); return ""; - } - [(set_attr "predicable" "false")]) + }) (define_expand "memory_barrier" [(set (match_dup 0) @@ -2307,7 +2310,7 @@ (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))] "" "\\tmembar.sys;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "nvptx_membar_cta" [(set (match_dup 0) @@ -2323,7 +2326,7 @@ (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] "" "\\tmembar.cta;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_expand "nvptx_membar_gl" [(set (match_dup 0) @@ -2339,13 +2342,13 @@ (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_GL))] "" "\\tmembar.gl;" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_nounroll" [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] "" "\\t.pragma \\\"nounroll\\\";" - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) (define_insn "nvptx_red_partition" [(set (match_operand:DI 0 "nonimmediate_operand" "=R") @@ -2355,7 +2358,7 @@ { return nvptx_output_red_partition (operands[0], operands[1]); } - [(set_attr "predicable" "false")]) + [(set_attr "predicable" "no")]) ;; Expand QI mode operations using SI mode instructions. (define_code_iterator any_sbinary [plus minus smin smax]) diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt index c83ceb3..fea99c5 100644 --- a/gcc/config/nvptx/nvptx.opt +++ b/gcc/config/nvptx/nvptx.opt @@ -53,7 +53,7 @@ Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt. ; Default needs to be in sync with default in ASM_SPEC in nvptx.h. misa= -Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM35) +Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_SM30) Specify the version of the ptx ISA to use. Enum diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx index 8f67264..b63c4a5 100644 --- a/gcc/config/nvptx/t-nvptx +++ b/gcc/config/nvptx/t-nvptx @@ -30,6 +30,4 @@ s-nvptx-gen-opt: $(srcdir)/config/nvptx/nvptx-sm.def tmp-nvptx-gen.opt $(srcdir)/config/nvptx/nvptx-gen.opt $(STAMP) s-nvptx-gen-opt -MULTILIB_OPTIONS = mgomp - -MULTILIB_EXTRA_OPTS = misa=sm_30 mptx=3.1 +MULTILIB_OPTIONS = mgomp mptx=3.1 diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc index 3afe78f..283e830 100644 --- a/gcc/config/rs6000/rs6000.cc +++ b/gcc/config/rs6000/rs6000.cc @@ -5804,20 +5804,28 @@ rs6000_machine_from_flags (void) if (rs6000_cpu == PROCESSOR_MPCCORE) return "\"821\""; +#if 0 + /* This (and ppc64 below) are disabled here (for now at least) because + PROCESSOR_POWERPC, PROCESSOR_POWERPC64, and PROCESSOR_COMMON + are #define'd as some of these. Untangling that is a job for later. */ + /* 600 series and 700 series, "classic" */ if (rs6000_cpu == PROCESSOR_PPC601 || rs6000_cpu == PROCESSOR_PPC603 || rs6000_cpu == PROCESSOR_PPC604 || rs6000_cpu == PROCESSOR_PPC604e - || rs6000_cpu == PROCESSOR_PPC750 || rs6000_cpu == PROCESSOR_POWERPC) + || rs6000_cpu == PROCESSOR_PPC750) return "ppc"; +#endif /* Classic with AltiVec, "G4" */ if (rs6000_cpu == PROCESSOR_PPC7400 || rs6000_cpu == PROCESSOR_PPC7450) return "\"7450\""; +#if 0 /* The older 64-bit CPUs */ if (rs6000_cpu == PROCESSOR_PPC620 || rs6000_cpu == PROCESSOR_PPC630 - || rs6000_cpu == PROCESSOR_RS64A || rs6000_cpu == PROCESSOR_POWERPC64) + || rs6000_cpu == PROCESSOR_RS64A) return "ppc64"; +#endif HOST_WIDE_INT flags = rs6000_isa_flags; diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index d0fb92f..15bd86d 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -5033,7 +5033,7 @@ ;; generate the vextsd2q instruction. (define_insn_and_split "extendditi2" [(set (match_operand:TI 0 "register_operand" "=r,r,v,v,v") - (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,r,wa,Z"))) + (sign_extend:TI (match_operand:DI 1 "input_operand" "r,m,b,wa,Z"))) (clobber (reg:DI CA_REGNO))] "TARGET_POWERPC64 && TARGET_POWER10" "#" |