aboutsummaryrefslogtreecommitdiff
path: root/gcc/config/nds32
diff options
context:
space:
mode:
authorMonk Chiang <sh.chiang04@gmail.com>2018-05-27 16:18:24 +0000
committerChung-Ju Wu <jasonwucj@gcc.gnu.org>2018-05-27 16:18:24 +0000
commit8889fbe5424f437f504901a659ebcc772066a478 (patch)
tree7a29d461fbb70054bce48b789e09a4a04a4a3a53 /gcc/config/nds32
parent0be3bad7054256f009a066cce34b4cca12f3b86f (diff)
downloadgcc-8889fbe5424f437f504901a659ebcc772066a478.zip
gcc-8889fbe5424f437f504901a659ebcc772066a478.tar.gz
gcc-8889fbe5424f437f504901a659ebcc772066a478.tar.bz2
[NDS32] Optimize movmem and setmem operations.
gcc/ * config/nds32/nds32-intrinsic.md (unaligned_storedi): Modify patterns implementation. (unaligned_store_dw): Ditto. * config/nds32/nds32-memory-manipulation.c (nds32_expand_movmemsi_loop_known_size): Refactoring implementation. (nds32_gen_dup_4_byte_to_word_value): Rename to ... (nds32_gen_dup_4_byte_to_word_value_aux): ... this. (emit_setmem_word_loop): Rename to ... (emit_setmem_doubleword_loop): ... this. (nds32_gen_dup_4_byte_to_word_value): New function. (nds32_gen_dup_8_byte_to_double_word_value): New function. (nds32_expand_setmem_loop): Refine implementation. (nds32_expand_setmem_loop_v3m): Ditto. * config/nds32/nds32-multiple.md (unaligned_store_update_base_dw): New pattern. Co-Authored-By: Chung-Ju Wu <jasonwucj@gmail.com> From-SVN: r260805
Diffstat (limited to 'gcc/config/nds32')
-rw-r--r--gcc/config/nds32/nds32-intrinsic.md15
-rw-r--r--gcc/config/nds32/nds32-memory-manipulation.c186
-rw-r--r--gcc/config/nds32/nds32-multiple.md19
3 files changed, 186 insertions, 34 deletions
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index 02f7285..c70a6fc 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -1596,22 +1596,17 @@
if (TARGET_ISA_V3M)
nds32_expand_unaligned_store (operands, DImode);
else
- emit_insn (gen_unaligned_store_dw (operands[0], operands[1]));
+ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[0]),
+ operands[1]));
DONE;
})
(define_insn "unaligned_store_dw"
- [(set (mem:DI (match_operand:SI 0 "register_operand" "r"))
- (unspec:DI [(match_operand:DI 1 "register_operand" "r")] UNSPEC_UASTORE_DW))]
+ [(set (match_operand:DI 0 "nds32_lmw_smw_base_operand" "=Umw")
+ (unspec:DI [(match_operand:DI 1 "register_operand" " r")] UNSPEC_UASTORE_DW))]
""
{
- rtx otherops[3];
- otherops[0] = gen_rtx_REG (SImode, REGNO (operands[1]));
- otherops[1] = gen_rtx_REG (SImode, REGNO (operands[1]) + 1);
- otherops[2] = operands[0];
-
- output_asm_insn ("smw.bi\t%0, [%2], %1, 0", otherops);
- return "";
+ return nds32_output_smw_double_word (operands);
}
[(set_attr "type" "store")
(set_attr "length" "4")]
diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c
index d02aabf..f6140e6 100644
--- a/gcc/config/nds32/nds32-memory-manipulation.c
+++ b/gcc/config/nds32/nds32-memory-manipulation.c
@@ -257,8 +257,124 @@ static bool
nds32_expand_movmemsi_loop_known_size (rtx dstmem, rtx srcmem,
rtx size, rtx alignment)
{
- return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
- size, alignment);
+ rtx dst_base_reg, src_base_reg;
+ rtx dst_itr, src_itr;
+ rtx dstmem_m, srcmem_m, dst_itr_m, src_itr_m;
+ rtx dst_end;
+ rtx double_word_mode_loop, byte_mode_loop;
+ rtx tmp;
+ int start_regno;
+ bool align_to_4_bytes = (INTVAL (alignment) & 3) == 0;
+ unsigned HOST_WIDE_INT total_bytes = UINTVAL (size);
+
+ if (TARGET_ISA_V3M && !align_to_4_bytes)
+ return 0;
+
+ if (TARGET_REDUCED_REGS)
+ start_regno = 2;
+ else
+ start_regno = 16;
+
+ dst_itr = gen_reg_rtx (Pmode);
+ src_itr = gen_reg_rtx (Pmode);
+ dst_end = gen_reg_rtx (Pmode);
+ tmp = gen_reg_rtx (QImode);
+
+ double_word_mode_loop = gen_label_rtx ();
+ byte_mode_loop = gen_label_rtx ();
+
+ dst_base_reg = copy_to_mode_reg (Pmode, XEXP (dstmem, 0));
+ src_base_reg = copy_to_mode_reg (Pmode, XEXP (srcmem, 0));
+
+ if (total_bytes < 8)
+ {
+ /* Emit total_bytes less than 8 loop version of movmem.
+ add $dst_end, $dst, $size
+ move $dst_itr, $dst
+ .Lbyte_mode_loop:
+ lbi.bi $tmp, [$src_itr], #1
+ sbi.bi $tmp, [$dst_itr], #1
+ ! Not readch upper bound. Loop.
+ bne $dst_itr, $dst_end, .Lbyte_mode_loop */
+
+ /* add $dst_end, $dst, $size */
+ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
+ NULL_RTX, 0, OPTAB_WIDEN);
+ /* move $dst_itr, $dst
+ move $src_itr, $src */
+ emit_move_insn (dst_itr, dst_base_reg);
+ emit_move_insn (src_itr, src_base_reg);
+
+ /* .Lbyte_mode_loop: */
+ emit_label (byte_mode_loop);
+
+ /* lbi.bi $tmp, [$src_itr], #1 */
+ nds32_emit_post_inc_load_store (tmp, src_itr, QImode, true);
+
+ /* sbi.bi $tmp, [$dst_itr], #1 */
+ nds32_emit_post_inc_load_store (tmp, dst_itr, QImode, false);
+ /* ! Not readch upper bound. Loop.
+ bne $dst_itr, $dst_end, .Lbyte_mode_loop */
+ emit_cmp_and_jump_insns (dst_itr, dst_end, NE, NULL,
+ SImode, 1, byte_mode_loop);
+ return true;
+ }
+ else if (total_bytes % 8 == 0)
+ {
+ /* Emit multiple of 8 loop version of movmem.
+
+ add $dst_end, $dst, $size
+ move $dst_itr, $dst
+ move $src_itr, $src
+
+ .Ldouble_word_mode_loop:
+ lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr
+ ! move will delete after register allocation
+ move $src_itr, $src_itr'
+ move $dst_itr, $dst_itr'
+ ! Not readch upper bound. Loop.
+ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
+
+ /* add $dst_end, $dst, $size */
+ dst_end = expand_binop (Pmode, add_optab, dst_base_reg, size,
+ NULL_RTX, 0, OPTAB_WIDEN);
+
+ /* move $dst_itr, $dst
+ move $src_itr, $src */
+ emit_move_insn (dst_itr, dst_base_reg);
+ emit_move_insn (src_itr, src_base_reg);
+
+ /* .Ldouble_word_mode_loop: */
+ emit_label (double_word_mode_loop);
+ /* lmw.bim $tmp-begin, [$src_itr], $tmp-end, #0 ! $src_itr' = $src_itr
+ smw.bim $tmp-begin, [$dst_itr], $tmp-end, #0 ! $dst_itr' = $dst_itr */
+ src_itr_m = src_itr;
+ dst_itr_m = dst_itr;
+ srcmem_m = srcmem;
+ dstmem_m = dstmem;
+ nds32_emit_mem_move_block (start_regno, 2,
+ &dst_itr_m, &dstmem_m,
+ &src_itr_m, &srcmem_m,
+ true);
+ /* move $src_itr, $src_itr'
+ move $dst_itr, $dst_itr' */
+ emit_move_insn (dst_itr, dst_itr_m);
+ emit_move_insn (src_itr, src_itr_m);
+
+ /* ! Not readch upper bound. Loop.
+ bne $double_word_end, $dst_itr, .Ldouble_word_mode_loop */
+ emit_cmp_and_jump_insns (dst_end, dst_itr, NE, NULL,
+ Pmode, 1, double_word_mode_loop);
+ }
+ else
+ {
+ /* Handle size greater than 8, and not a multiple of 8. */
+ return nds32_expand_movmemsi_loop_unknown_size (dstmem, srcmem,
+ size, alignment);
+ }
+
+ return true;
}
static bool
@@ -433,10 +549,8 @@ nds32_expand_movmemsi (rtx dstmem, rtx srcmem, rtx total_bytes, rtx alignment)
/* Auxiliary function for expand setmem pattern. */
static rtx
-nds32_gen_dup_4_byte_to_word_value (rtx value)
+nds32_gen_dup_4_byte_to_word_value_aux (rtx value, rtx value4word)
{
- rtx value4word = gen_reg_rtx (SImode);
-
gcc_assert (GET_MODE (value) == QImode || CONST_INT_P (value));
if (CONST_INT_P (value))
@@ -493,7 +607,30 @@ nds32_gen_dup_4_byte_to_word_value (rtx value)
}
static rtx
-emit_setmem_word_loop (rtx itr, rtx size, rtx value)
+nds32_gen_dup_4_byte_to_word_value (rtx value)
+{
+ rtx value4word = gen_reg_rtx (SImode);
+ nds32_gen_dup_4_byte_to_word_value_aux (value, value4word);
+
+ return value4word;
+}
+
+static rtx
+nds32_gen_dup_8_byte_to_double_word_value (rtx value)
+{
+ rtx value4doubleword = gen_reg_rtx (DImode);
+
+ nds32_gen_dup_4_byte_to_word_value_aux (
+ value, nds32_di_low_part_subreg(value4doubleword));
+
+ emit_move_insn (nds32_di_high_part_subreg(value4doubleword),
+ nds32_di_low_part_subreg(value4doubleword));
+ return value4doubleword;
+}
+
+
+static rtx
+emit_setmem_doubleword_loop (rtx itr, rtx size, rtx value)
{
rtx word_mode_label = gen_label_rtx ();
rtx word_mode_end_label = gen_label_rtx ();
@@ -502,9 +639,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
rtx word_mode_end = gen_reg_rtx (SImode);
rtx size_for_word = gen_reg_rtx (SImode);
- /* and $size_for_word, $size, #~3 */
+ /* and $size_for_word, $size, #~0x7 */
size_for_word = expand_binop (SImode, and_optab, size,
- gen_int_mode (~3, SImode),
+ gen_int_mode (~0x7, SImode),
NULL_RTX, 0, OPTAB_WIDEN);
emit_move_insn (byte_mode_size, size);
@@ -516,8 +653,8 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
word_mode_end = expand_binop (Pmode, add_optab, itr, size_for_word,
NULL_RTX, 0, OPTAB_WIDEN);
- /* andi $byte_mode_size, $size, 3 */
- byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (3),
+ /* andi $byte_mode_size, $size, 0x7 */
+ byte_mode_size_tmp = expand_binop (SImode, and_optab, size, GEN_INT (0x7),
NULL_RTX, 0, OPTAB_WIDEN);
emit_move_insn (byte_mode_size, byte_mode_size_tmp);
@@ -527,9 +664,9 @@ emit_setmem_word_loop (rtx itr, rtx size, rtx value)
/* ! word-mode set loop
smw.bim $value4word, [$dst_itr], $value4word, 0
bne $word_mode_end, $dst_itr, .Lword_mode */
- emit_insn (gen_unaligned_store_update_base_w (itr,
- itr,
- value));
+ emit_insn (gen_unaligned_store_update_base_dw (itr,
+ itr,
+ value));
emit_cmp_and_jump_insns (word_mode_end, itr, NE, NULL,
Pmode, 1, word_mode_label);
@@ -581,7 +718,7 @@ emit_setmem_byte_loop (rtx itr, rtx size, rtx value, bool need_end)
static bool
nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
{
- rtx value4word;
+ rtx value4doubleword;
rtx value4byte;
rtx dst;
rtx byte_mode_size;
@@ -624,7 +761,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
or $tmp3, $tmp1, $tmp2 ! $tmp3 <- 0x0000abab
slli $tmp4, $tmp3, 16 ! $tmp4 <- 0xabab0000
or $val4word, $tmp3, $tmp4 ! $value4word <- 0xabababab */
- value4word = nds32_gen_dup_4_byte_to_word_value (value);
+ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
/* and $size_for_word, $size, #-4
beqz $size_for_word, .Lword_mode_end
@@ -637,7 +774,7 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
smw.bim $value4word, [$dst], $value4word, 0
bne $word_mode_end, $dst, .Lword_mode
.Lword_mode_end: */
- byte_mode_size = emit_setmem_word_loop (dst, size, value4word);
+ byte_mode_size = emit_setmem_doubleword_loop (dst, size, value4doubleword);
/* beqz $byte_mode_size, .Lend
add $byte_mode_end, $dst, $byte_mode_size
@@ -648,8 +785,8 @@ nds32_expand_setmem_loop (rtx dstmem, rtx size, rtx value)
bne $byte_mode_end, $dst, .Lbyte_mode
.Lend: */
- value4byte = simplify_gen_subreg (QImode, value4word, SImode,
- subreg_lowpart_offset (QImode, SImode));
+ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
+ subreg_lowpart_offset (QImode, DImode));
emit_setmem_byte_loop (dst, byte_mode_size, value4byte, false);
@@ -666,14 +803,15 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
rtx byte_loop_size = gen_reg_rtx (SImode);
rtx remain_size = gen_reg_rtx (SImode);
rtx new_base_reg;
- rtx value4byte, value4word;
+ rtx value4byte, value4doubleword;
rtx byte_mode_size;
rtx last_byte_loop_label = gen_label_rtx ();
size = force_reg (SImode, size);
- value4word = nds32_gen_dup_4_byte_to_word_value (value);
- value4byte = simplify_gen_subreg (QImode, value4word, SImode, 0);
+ value4doubleword = nds32_gen_dup_8_byte_to_double_word_value (value);
+ value4byte = simplify_gen_subreg (QImode, value4doubleword, DImode,
+ subreg_lowpart_offset (QImode, DImode));
emit_move_insn (byte_loop_size, size);
emit_move_insn (byte_loop_base, base_reg);
@@ -701,9 +839,9 @@ nds32_expand_setmem_loop_v3m (rtx dstmem, rtx size, rtx value)
emit_insn (gen_subsi3 (remain_size, size, need_align_bytes));
/* Set memory word by word. */
- byte_mode_size = emit_setmem_word_loop (new_base_reg,
- remain_size,
- value4word);
+ byte_mode_size = emit_setmem_doubleword_loop (new_base_reg,
+ remain_size,
+ value4doubleword);
emit_move_insn (byte_loop_base, new_base_reg);
emit_move_insn (byte_loop_size, byte_mode_size);
diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md
index a8f7717..80746b1 100644
--- a/gcc/config/nds32/nds32-multiple.md
+++ b/gcc/config/nds32/nds32-multiple.md
@@ -2854,6 +2854,25 @@
(set_attr "length" "4")]
)
+(define_expand "unaligned_store_update_base_dw"
+ [(parallel [(set (match_operand:SI 0 "register_operand" "=r")
+ (plus:SI (match_operand:SI 1 "register_operand" "0") (const_int 8)))
+ (set (mem:DI (match_dup 1))
+ (unspec:DI [(match_operand:DI 2 "register_operand" "r")] UNSPEC_UASTORE_DW))])]
+ ""
+{
+ /* DO NOT emit unaligned_store_w_m immediately since web pass don't
+ recognize post_inc, try it again after GCC 5.0.
+ REF: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63156 */
+ emit_insn (gen_unaligned_store_dw (gen_rtx_MEM (DImode, operands[1]), operands[2]));
+ emit_insn (gen_addsi3 (operands[0], operands[1], gen_int_mode (8, Pmode)));
+ DONE;
+}
+ [(set_attr "type" "store_multiple")
+ (set_attr "combo" "2")
+ (set_attr "length" "4")]
+)
+
(define_insn "*stmsi25"
[(match_parallel 0 "nds32_store_multiple_operation"
[(set (mem:SI (match_operand:SI 1 "register_operand" "r"))