diff options
Diffstat (limited to 'gcc/config/alpha/alpha.cc')
-rw-r--r-- | gcc/config/alpha/alpha.cc | 703 |
1 files changed, 661 insertions, 42 deletions
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc index 6965ece..ba470d9 100644 --- a/gcc/config/alpha/alpha.cc +++ b/gcc/config/alpha/alpha.cc @@ -1661,8 +1661,10 @@ alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i, if (!aligned_memory_operand (x, mode)) sri->icode = direct_optab_handler (reload_in_optab, mode); } - else + else if (aligned_memory_operand (x, mode) || !TARGET_SAFE_BWA) sri->icode = direct_optab_handler (reload_out_optab, mode); + else + sri->icode = code_for_reload_out_safe_bwa (mode); return NO_REGS; } } @@ -2391,6 +2393,70 @@ alpha_expand_mov_nobwx (machine_mode mode, rtx *operands) return false; } +/* Expand a multi-thread and async-signal safe QImode or HImode + move instruction; return true if all work is done. */ + +bool +alpha_expand_mov_safe_bwa (machine_mode mode, rtx *operands) +{ + /* If the output is not a register, the input must be. */ + if (MEM_P (operands[0])) + operands[1] = force_reg (mode, operands[1]); + + /* If it's a memory load, the sequence is the usual non-BWX one. */ + if (any_memory_operand (operands[1], mode)) + return alpha_expand_mov_nobwx (mode, operands); + + /* Handle memory store cases, unaligned and aligned. The only case + where we can be called during reload is for aligned loads; all + other cases require temporaries. */ + if (any_memory_operand (operands[0], mode)) + { + if (aligned_memory_operand (operands[0], mode)) + { + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx aligned_mem, bitnum; + rtx status = gen_reg_rtx (SImode); + rtx temp = gen_reg_rtx (SImode); + get_aligned_mem (operands[0], &aligned_mem, &bitnum); + emit_insn (gen_aligned_store_safe_bwa (aligned_mem, operands[1], + bitnum, status, temp)); + + rtx cond = gen_rtx_EQ (DImode, + gen_rtx_SUBREG (DImode, status, 0), + const0_rtx); + alpha_emit_unlikely_jump (cond, label); + } + else + { + rtx addr = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addr, get_unaligned_address (operands[0]))); + + rtx aligned_addr = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (aligned_addr, + gen_rtx_AND (DImode, addr, GEN_INT (-8)))); + + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx status = gen_reg_rtx (DImode); + rtx temp = gen_reg_rtx (DImode); + rtx seq = gen_unaligned_store_safe_bwa (mode, addr, operands[1], + aligned_addr, status, temp); + alpha_set_memflags (seq, operands[0]); + emit_insn (seq); + + rtx cond = gen_rtx_EQ (DImode, status, const0_rtx); + alpha_emit_unlikely_jump (cond, label); + } + return true; + } + + return false; +} + /* Implement the movmisalign patterns. One of the operands is a memory that is not naturally aligned. Emit instructions to load it. */ @@ -2415,7 +2481,11 @@ alpha_expand_movmisalign (machine_mode mode, rtx *operands) { if (!reg_or_0_operand (operands[1], mode)) operands[1] = force_reg (mode, operands[1]); - alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (operands[0], operands[1], + 8, 0, BITS_PER_UNIT); + else + alpha_expand_unaligned_store (operands[0], operands[1], 8, 0); } else gcc_unreachable (); @@ -3607,6 +3677,310 @@ alpha_expand_unaligned_store (rtx dst, rtx src, emit_move_insn (meml, dstl); } +/* Store data SRC of size SIZE using unaligned methods to location + referred by base DST plus offset OFS and of alignment ALIGN. This is + a multi-thread and async-signal safe implementation for all sizes from + 8 down to 1. + + For BWX targets it is straightforward, we just write data piecemeal, + taking any advantage of the alignment known and observing that we + shouldn't have been called for alignments of 32 or above in the first + place (though adding support for that would be easy). + + For non-BWX targets we need to load data from memory, mask it such as + to keep any part outside the area written, insert data to be stored, + and write the result back atomically. For sizes that are not a power + of 2 there are no byte mask or insert machine instructions available + so the mask required has to be built by hand, however ZAP and ZAPNOT + instructions can then be used to apply the mask. Since LL/SC loops + are used, the high and low parts have to be disentangled from each + other and handled sequentially except for size 1 where there is only + the low part to be written. */ + +void +alpha_expand_unaligned_store_safe_partial (rtx dst, rtx src, + HOST_WIDE_INT size, + HOST_WIDE_INT ofs, + HOST_WIDE_INT align) +{ + if (TARGET_BWX) + { + machine_mode mode = align >= 2 * BITS_PER_UNIT ? HImode : QImode; + HOST_WIDE_INT step = mode == HImode ? 2 : 1; + + while (1) + { + rtx dstl = src == const0_rtx ? const0_rtx : gen_lowpart (mode, src); + rtx meml = adjust_address (dst, mode, ofs); + emit_move_insn (meml, dstl); + + ofs += step; + size -= step; + if (size == 0) + return; + + if (size < step) + { + mode = QImode; + step = 1; + } + + if (src != const0_rtx) + src = expand_simple_binop (DImode, LSHIFTRT, src, + GEN_INT (step * BITS_PER_UNIT), + NULL, 1, OPTAB_WIDEN); + } + } + + rtx dsta = XEXP (dst, 0); + if (GET_CODE (dsta) == LO_SUM) + dsta = force_reg (Pmode, dsta); + + rtx addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs)); + + rtx byte_mask = NULL_RTX; + switch (size) + { + case 3: + case 5: + case 6: + case 7: + /* If size is not a power of 2 we need to build the byte mask from + size by hand. This is SIZE consecutive bits starting from bit 0. */ + byte_mask = force_reg (DImode, GEN_INT (~(HOST_WIDE_INT_M1U << size))); + + /* Unlike with machine INSxx and MSKxx operations there is no + implicit mask applied to addr with corresponding operations + made by hand, so extract the byte index now. */ + emit_insn (gen_rtx_SET (addr, + gen_rtx_AND (DImode, addr, GEN_INT (~-8)))); + } + + /* Must handle high before low for degenerate case of aligned. */ + if (size != 1) + { + rtx addrh = gen_reg_rtx (DImode); + rtx aligned_addrh = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addrh, + plus_constant (DImode, dsta, ofs + size - 1))); + emit_insn (gen_rtx_SET (aligned_addrh, + gen_rtx_AND (DImode, addrh, GEN_INT (-8)))); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + rtx memh = change_address (dst, DImode, aligned_addrh); + set_mem_alias_set (memh, 0); + + rtx insh = gen_reg_rtx (DImode); + rtx maskh = NULL_RTX; + switch (size) + { + case 1: + case 2: + case 4: + case 8: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insxh (insh, gen_lowpart (DImode, src), + GEN_INT (size * 8), addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + /* For the high part we shift the byte mask right by 8 minus + the byte index in addr, so we need an extra calculation. */ + rtx shamt = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (shamt, + gen_rtx_MINUS (DImode, + force_reg (DImode, + GEN_INT (8)), + addr))); + + maskh = gen_reg_rtx (DImode); + rtx shift = gen_rtx_LSHIFTRT (DImode, byte_mask, shamt); + emit_insn (gen_rtx_SET (maskh, shift)); + + /* Insert any bytes required by hand, by doing a byte-wise + shift on SRC right by the same number and then zap the + bytes outside the byte mask. */ + if (src != CONST0_RTX (GET_MODE (src))) + { + rtx byte_loc = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (byte_loc, + gen_rtx_ASHIFT (DImode, + shamt, GEN_INT (3)))); + rtx bytes = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (bytes, + gen_rtx_LSHIFTRT (DImode, + gen_lowpart (DImode, + src), + byte_loc))); + + rtx zapmask = gen_rtx_NOT (QImode, + gen_rtx_SUBREG (QImode, maskh, 0)); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (insh, + gen_rtx_AND (DImode, zap, bytes))); + } + } + break; + default: + gcc_unreachable (); + } + + rtx labelh = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (labelh, 0)); + + rtx dsth = gen_reg_rtx (DImode); + emit_insn (gen_load_locked (DImode, dsth, memh)); + + switch (size) + { + case 1: + case 2: + case 4: + case 8: + emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + rtx zapmask = gen_rtx_SUBREG (QImode, maskh, 0); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (dsth, gen_rtx_AND (DImode, zap, dsth))); + } + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + dsth = expand_simple_binop (DImode, IOR, insh, dsth, dsth, 0, + OPTAB_WIDEN); + + emit_insn (gen_store_conditional (DImode, dsth, memh, dsth)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dsth, const0_rtx), labelh); + } + + /* Now handle low. */ + rtx addrl = gen_reg_rtx (DImode); + rtx aligned_addrl = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (addrl, plus_constant (DImode, dsta, ofs))); + emit_insn (gen_rtx_SET (aligned_addrl, + gen_rtx_AND (DImode, addrl, GEN_INT (-8)))); + + /* AND addresses cannot be in any alias set, since they may implicitly + alias surrounding code. Ideally we'd have some alias set that + covered all types except those with alignment 8 or higher. */ + rtx meml = change_address (dst, DImode, aligned_addrl); + set_mem_alias_set (meml, 0); + + rtx insl = gen_reg_rtx (DImode); + rtx maskl; + switch (size) + { + case 1: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insbl (insl, gen_lowpart (QImode, src), addr)); + break; + case 2: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr)); + break; + case 4: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr)); + break; + case 8: + if (src != CONST0_RTX (GET_MODE (src))) + emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr)); + break; + case 3: + case 5: + case 6: + case 7: + /* For the low part we shift the byte mask left by the byte index, + which is already in ADDR. */ + maskl = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (maskl, + gen_rtx_ASHIFT (DImode, byte_mask, addr))); + + /* Insert any bytes required by hand, by doing a byte-wise shift + on SRC left by the same number and then zap the bytes outside + the byte mask. */ + if (src != CONST0_RTX (GET_MODE (src))) + { + rtx byte_loc = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (byte_loc, + gen_rtx_ASHIFT (DImode, + force_reg (DImode, addr), + GEN_INT (3)))); + rtx bytes = gen_reg_rtx (DImode); + emit_insn (gen_rtx_SET (bytes, + gen_rtx_ASHIFT (DImode, + gen_lowpart (DImode, src), + byte_loc))); + + rtx zapmask = gen_rtx_NOT (QImode, + gen_rtx_SUBREG (QImode, maskl, 0)); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), + UNSPEC_ZAP); + emit_insn (gen_rtx_SET (insl, gen_rtx_AND (DImode, zap, bytes))); + } + break; + default: + gcc_unreachable (); + } + + rtx labell = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (labell, 0)); + + rtx dstl = gen_reg_rtx (DImode); + emit_insn (gen_load_locked (DImode, dstl, meml)); + + switch (size) + { + case 1: + emit_insn (gen_mskbl (dstl, dstl, addr)); + break; + case 2: + emit_insn (gen_mskwl (dstl, dstl, addr)); + break; + case 4: + emit_insn (gen_mskll (dstl, dstl, addr)); + break; + case 8: + emit_insn (gen_mskql (dstl, dstl, addr)); + break; + case 3: + case 5: + case 6: + case 7: + { + rtx zapmask = gen_rtx_SUBREG (QImode, maskl, 0); + rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), UNSPEC_ZAP); + emit_insn (gen_rtx_SET (dstl, gen_rtx_AND (DImode, zap, dstl))); + } + break; + default: + gcc_unreachable (); + } + + if (src != CONST0_RTX (GET_MODE (src))) + dstl = expand_simple_binop (DImode, IOR, insl, dstl, dstl, 0, OPTAB_WIDEN); + + emit_insn (gen_store_conditional (DImode, dstl, meml, dstl)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dstl, const0_rtx), labell); +} + /* The block move code tries to maximize speed by separating loads and stores at the expense of register pressure: we load all of the data before we store it back out. There are two secondary effects worth @@ -3772,6 +4146,117 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem, emit_move_insn (st_addr_1, st_tmp_1); } +/* Store an integral number of consecutive unaligned quadwords. DATA_REGS + may be NULL to store zeros. This is a multi-thread and async-signal + safe implementation. */ + +static void +alpha_expand_unaligned_store_words_safe_partial (rtx *data_regs, rtx dmem, + HOST_WIDE_INT words, + HOST_WIDE_INT ofs, + HOST_WIDE_INT align) +{ + rtx const im8 = GEN_INT (-8); + rtx ins_tmps[MAX_MOVE_WORDS]; + HOST_WIDE_INT i; + + /* Generate all the tmp registers we need. */ + for (i = 0; i < words; i++) + ins_tmps[i] = data_regs != NULL ? gen_reg_rtx (DImode) : const0_rtx; + + if (ofs != 0) + dmem = adjust_address (dmem, GET_MODE (dmem), ofs); + + /* For BWX store the ends before we start fiddling with data registers + to fill the middle. Also if we have no more than two quadwords, + then obviously we're done. */ + if (TARGET_BWX) + { + rtx datan = data_regs ? data_regs[words - 1] : const0_rtx; + rtx data0 = data_regs ? data_regs[0] : const0_rtx; + HOST_WIDE_INT e = (words - 1) * 8; + + alpha_expand_unaligned_store_safe_partial (dmem, data0, 8, 0, align); + alpha_expand_unaligned_store_safe_partial (dmem, datan, 8, e, align); + if (words <= 2) + return; + } + + rtx dmema = XEXP (dmem, 0); + if (GET_CODE (dmema) == LO_SUM) + dmema = force_reg (Pmode, dmema); + + /* Shift the input data into place. */ + rtx dreg = copy_addr_to_reg (dmema); + if (data_regs != NULL) + { + for (i = words - 1; i >= 0; i--) + { + emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg)); + emit_insn (gen_insql (data_regs[i], data_regs[i], dreg)); + } + for (i = words - 1; i > 0; i--) + ins_tmps[i - 1] = expand_simple_binop (DImode, IOR, data_regs[i], + ins_tmps[i - 1], + ins_tmps[i - 1], + 1, OPTAB_DIRECT); + } + + if (!TARGET_BWX) + { + rtx temp = gen_reg_rtx (DImode); + rtx mem = gen_rtx_MEM (DImode, + expand_simple_binop (Pmode, AND, dreg, im8, + NULL_RTX, 1, OPTAB_DIRECT)); + + rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (DImode, temp, mem)); + emit_insn (gen_mskql (temp, temp, dreg)); + if (data_regs != NULL) + temp = expand_simple_binop (DImode, IOR, temp, data_regs[0], + temp, 1, OPTAB_DIRECT); + emit_insn (gen_store_conditional (DImode, temp, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label); + } + + for (i = words - 1; i > 0; --i) + { + rtx temp = change_address (dmem, Pmode, + gen_rtx_AND (Pmode, + plus_constant (Pmode, + dmema, i * 8), + im8)); + set_mem_alias_set (temp, 0); + emit_move_insn (temp, ins_tmps[i - 1]); + } + + if (!TARGET_BWX) + { + rtx temp = gen_reg_rtx (DImode); + rtx addr = expand_simple_binop (Pmode, PLUS, dreg, + GEN_INT (words * 8 - 1), + NULL_RTX, 1, OPTAB_DIRECT); + rtx mem = gen_rtx_MEM (DImode, + expand_simple_binop (Pmode, AND, addr, im8, + NULL_RTX, 1, OPTAB_DIRECT)); + + rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + emit_insn (gen_load_locked (DImode, temp, mem)); + emit_insn (gen_mskqh (temp, temp, dreg)); + if (data_regs != NULL) + temp = expand_simple_binop (DImode, IOR, temp, ins_tmps[words - 1], + temp, 1, OPTAB_DIRECT); + emit_insn (gen_store_conditional (DImode, temp, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label); + } +} + /* Get the base alignment and offset of EXPR in A and O respectively. Check for any pseudo register pointer alignment and for any tree node information and return the largest alignment determined and @@ -4081,26 +4566,74 @@ alpha_expand_block_move (rtx operands[]) if (GET_MODE (data_regs[i + words]) != DImode) break; - if (words == 1) - alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + if (TARGET_SAFE_PARTIAL) + { + if (words == 1) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 8, ofs, dst_align); + else + alpha_expand_unaligned_store_words_safe_partial (data_regs + i, + orig_dst, words, + ofs, dst_align); + } else - alpha_expand_unaligned_store_words (data_regs + i, orig_dst, - words, ofs); - + { + if (words == 1) + alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs); + else + alpha_expand_unaligned_store_words (data_regs + i, orig_dst, + words, ofs); + } i += words; ofs += words * 8; } - /* Due to the above, this won't be aligned. */ + /* If we are in the partial memory access safety mode with a non-BWX + target, then coalesce data loaded of different widths so as to + minimize the number of safe partial stores as they are expensive. */ + if (!TARGET_BWX && TARGET_SAFE_PARTIAL) + { + HOST_WIDE_INT size = 0; + unsigned int n; + + for (n = i; i < nregs; i++) + { + if (i != n) + { + /* Don't widen SImode data where obtained by extraction. */ + rtx data = data_regs[n]; + if (GET_MODE (data) == SImode && src_align < 32) + data = gen_rtx_SUBREG (DImode, data, 0); + rtx field = expand_simple_binop (DImode, ASHIFT, data_regs[i], + GEN_INT (size * BITS_PER_UNIT), + NULL_RTX, 1, OPTAB_DIRECT); + data_regs[n] = expand_simple_binop (DImode, IOR, data, field, + data, 1, OPTAB_WIDEN); + } + size += GET_MODE_SIZE (GET_MODE (data_regs[i])); + gcc_assert (size < 8); + } + if (size > 0) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[n], + size, ofs, dst_align); + ofs += size; + } + + /* We've done aligned stores above, this won't be aligned. */ while (i < nregs && GET_MODE (data_regs[i]) == SImode) { - alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 4, ofs, dst_align); + else + alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs); ofs += 4; i++; gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode); } - if (dst_align >= 16) + if (TARGET_BWX && dst_align >= 16) while (i < nregs && GET_MODE (data_regs[i]) == HImode) { emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]); @@ -4110,7 +4643,12 @@ alpha_expand_block_move (rtx operands[]) else while (i < nregs && GET_MODE (data_regs[i]) == HImode) { - alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i], + 2, ofs, dst_align); + else + alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs); i++; ofs += 2; } @@ -4119,6 +4657,7 @@ alpha_expand_block_move (rtx operands[]) while (i < nregs) { gcc_assert (GET_MODE (data_regs[i]) == QImode); + gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL); emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]); i++; ofs += 1; @@ -4127,6 +4666,27 @@ alpha_expand_block_move (rtx operands[]) return 1; } +/* Expand a multi-thread and async-signal safe partial clear of a longword + or a quadword quantity indicated by MODE at aligned memory location MEM + according to MASK. */ + +static void +alpha_expand_clear_safe_partial_nobwx (rtx mem, machine_mode mode, + HOST_WIDE_INT mask) +{ + rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ()); + emit_label (XEXP (label, 0)); + + rtx temp = gen_reg_rtx (mode); + rtx status = mode == DImode ? temp : gen_rtx_SUBREG (DImode, temp, 0); + + emit_insn (gen_load_locked (mode, temp, mem)); + emit_insn (gen_rtx_SET (temp, gen_rtx_AND (mode, temp, GEN_INT (mask)))); + emit_insn (gen_store_conditional (mode, status, mem, temp)); + + alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, status, const0_rtx), label); +} + int alpha_expand_block_clear (rtx operands[]) { @@ -4171,8 +4731,9 @@ alpha_expand_block_clear (rtx operands[]) { /* Given that alignofs is bounded by align, the only time BWX could generate three stores is for a 7 byte fill. Prefer two individual - stores over a load/mask/store sequence. */ - if ((!TARGET_BWX || alignofs == 7) + stores over a load/mask/store sequence. In the partial safety + mode always do individual stores regardless of their count. */ + if ((!TARGET_BWX || (!TARGET_SAFE_PARTIAL && alignofs == 7)) && align >= 32 && !(alignofs == 4 && bytes >= 4)) { @@ -4198,10 +4759,15 @@ alpha_expand_block_clear (rtx operands[]) } alignofs = 0; - tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), - NULL_RTX, 1, OPTAB_WIDEN); + if (TARGET_SAFE_PARTIAL) + alpha_expand_clear_safe_partial_nobwx (mem, mode, mask); + else + { + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); - emit_move_insn (mem, tmp); + emit_move_insn (mem, tmp); + } } if (TARGET_BWX && (alignofs & 1) && bytes >= 1) @@ -4306,7 +4872,11 @@ alpha_expand_block_clear (rtx operands[]) { words = bytes / 8; - alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); + if (TARGET_SAFE_PARTIAL) + alpha_expand_unaligned_store_words_safe_partial (NULL, orig_dst, + words, ofs, align); + else + alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs); bytes -= words * 8; ofs += words * 8; @@ -4323,7 +4893,7 @@ alpha_expand_block_clear (rtx operands[]) /* If we have appropriate alignment (and it wouldn't take too many instructions otherwise), mask out the bytes we need. */ - if ((TARGET_BWX ? words > 2 : bytes > 0) + if ((TARGET_BWX ? !TARGET_SAFE_PARTIAL && words > 2 : bytes > 0) && (align >= 64 || (align >= 32 && bytes < 4))) { machine_mode mode = (align >= 64 ? DImode : SImode); @@ -4335,18 +4905,46 @@ alpha_expand_block_clear (rtx operands[]) mask = HOST_WIDE_INT_M1U << (bytes * 8); - tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), - NULL_RTX, 1, OPTAB_WIDEN); + if (TARGET_SAFE_PARTIAL) + alpha_expand_clear_safe_partial_nobwx (mem, mode, mask); + else + { + tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask), + NULL_RTX, 1, OPTAB_WIDEN); - emit_move_insn (mem, tmp); + emit_move_insn (mem, tmp); + } return 1; } - if (!TARGET_BWX && bytes >= 4) + if (bytes >= 4) { - alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); - bytes -= 4; - ofs += 4; + if (align >= 32) + do + { + emit_move_insn (adjust_address (orig_dst, SImode, ofs), + const0_rtx); + bytes -= 4; + ofs += 4; + } + while (bytes >= 4); + else if (!TARGET_BWX) + { + gcc_assert (bytes < 8); + if (TARGET_SAFE_PARTIAL) + { + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } + else + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs); + bytes -= 4; + ofs += 4; + } + } } if (bytes >= 2) @@ -4362,18 +4960,38 @@ alpha_expand_block_clear (rtx operands[]) } else if (! TARGET_BWX) { - alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); - bytes -= 2; - ofs += 2; + gcc_assert (bytes < 4); + if (TARGET_SAFE_PARTIAL) + { + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } + else + { + alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs); + bytes -= 2; + ofs += 2; + } } } while (bytes > 0) - { - emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); - bytes -= 1; - ofs += 1; - } + if (TARGET_BWX || !TARGET_SAFE_PARTIAL) + { + emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx); + bytes -= 1; + ofs += 1; + } + else + { + gcc_assert (bytes < 2); + alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx, + bytes, ofs, align); + ofs += bytes; + bytes = 0; + } return 1; } @@ -4421,12 +5039,13 @@ alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx), /* A subroutine of the atomic operation splitters. Jump to LABEL if COND is true. Mark the jump as unlikely to be taken. */ -static void -emit_unlikely_jump (rtx cond, rtx label) +rtx +alpha_emit_unlikely_jump (rtx cond, rtx label) { rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx); rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x)); add_reg_br_prob_note (insn, profile_probability::very_unlikely ()); + return insn; } /* Subroutines of the atomic operation splitters. Emit barriers @@ -4518,7 +5137,7 @@ alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before, emit_insn (gen_store_conditional (mode, cond, mem, scratch)); x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } @@ -4568,7 +5187,7 @@ alpha_split_compare_and_swap (rtx operands[]) emit_insn (gen_rtx_SET (cond, x)); x = gen_rtx_EQ (DImode, cond, const0_rtx); } - emit_unlikely_jump (x, label2); + alpha_emit_unlikely_jump (x, label2); emit_move_insn (cond, newval); emit_insn (gen_store_conditional @@ -4577,7 +5196,7 @@ alpha_split_compare_and_swap (rtx operands[]) if (!is_weak) { x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label1); + alpha_emit_unlikely_jump (x, label1); } if (!is_mm_relaxed (mod_f)) @@ -4680,7 +5299,7 @@ alpha_split_compare_and_swap_12 (rtx operands[]) emit_insn (gen_rtx_SET (cond, x)); x = gen_rtx_EQ (DImode, cond, const0_rtx); } - emit_unlikely_jump (x, label2); + alpha_emit_unlikely_jump (x, label2); emit_insn (gen_mskxl (cond, scratch, mask, addr)); @@ -4692,7 +5311,7 @@ alpha_split_compare_and_swap_12 (rtx operands[]) if (!is_weak) { x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label1); + alpha_emit_unlikely_jump (x, label1); } if (!is_mm_relaxed (mod_f)) @@ -4732,7 +5351,7 @@ alpha_split_atomic_exchange (rtx operands[]) emit_insn (gen_store_conditional (mode, cond, mem, scratch)); x = gen_rtx_EQ (DImode, cond, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } @@ -4806,7 +5425,7 @@ alpha_split_atomic_exchange_12 (rtx operands[]) emit_insn (gen_store_conditional (DImode, scratch, mem, scratch)); x = gen_rtx_EQ (DImode, scratch, const0_rtx); - emit_unlikely_jump (x, label); + alpha_emit_unlikely_jump (x, label); alpha_post_atomic_barrier (model); } |