aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Waterman <andrew@sifive.com>2017-11-07 17:09:39 +0000
committerPalmer Dabbelt <palmer@gcc.gnu.org>2017-11-07 17:09:39 +0000
commit6ed01e6b282fa9eb5d19ab8bc829d821f624103e (patch)
tree572f7f70b0efc8ad8987a83d24131725bdb8be01
parent4d30a85ecee179acc22a213653f4c03028994a6b (diff)
downloadgcc-6ed01e6b282fa9eb5d19ab8bc829d821f624103e.zip
gcc-6ed01e6b282fa9eb5d19ab8bc829d821f624103e.tar.gz
gcc-6ed01e6b282fa9eb5d19ab8bc829d821f624103e.tar.bz2
RISC-V: Implement movmemsi
Without this we aren't getting proper memcpy inlining on RISC-V systems, which is particularly disastrous for Dhrystone performance on RV32IM systems. gcc/ChangeLog 2017-11-07 Andrew Waterman <andrew@sifive.com> * config/riscv/riscv-protos.h (riscv_hard_regno_nregs): New prototype. (riscv_expand_block_move): Likewise. gcc/config/riscv/riscv.h (MOVE_RATIO): Tune cost to movmemsi implementation. (RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER): New define. (RISCV_MAX_MOVE_BYTES_STRAIGHT): New define. gcc/config/riscv/riscv.c (riscv_block_move_straight): New function. (riscv_adjust_block_mem): Likewise. (riscv_block_move_loop): Likewise. (riscv_expand_block_move): Likewise. gcc/config/riscv/riscv.md (movmemsi): New pattern. From-SVN: r254501
-rw-r--r--gcc/ChangeLog16
-rw-r--r--gcc/config/riscv/riscv-protos.h4
-rw-r--r--gcc/config/riscv/riscv.c156
-rw-r--r--gcc/config/riscv/riscv.h21
-rw-r--r--gcc/config/riscv/riscv.md13
5 files changed, 206 insertions, 4 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index b771a0d..3af0677 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,19 @@
+2017-11-07 Andrew Waterman <andrew@sifive.com>
+
+ * config/riscv/riscv-protos.h (riscv_hard_regno_nregs): New
+ prototype.
+ (riscv_expand_block_move): Likewise.
+ gcc/config/riscv/riscv.h (MOVE_RATIO): Tune cost to movmemsi
+ implementation.
+ (RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER): New define.
+ (RISCV_MAX_MOVE_BYTES_STRAIGHT): New define.
+ gcc/config/riscv/riscv.c (riscv_block_move_straight): New
+ function.
+ (riscv_adjust_block_mem): Likewise.
+ (riscv_block_move_loop): Likewise.
+ (riscv_expand_block_move): Likewise.
+ gcc/config/riscv/riscv.md (movmemsi): New pattern.
+
2017-11-07 Michael Clark <michaeljclark@mac.com>
* config/riscv/linux.h (MUSL_ABI_SUFFIX): New define.
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index ae551fb..34f9859 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -67,7 +67,9 @@ extern HOST_WIDE_INT riscv_initial_elimination_offset (int, int);
extern void riscv_expand_prologue (void);
extern void riscv_expand_epilogue (bool);
extern bool riscv_can_use_return_insn (void);
-extern rtx riscv_function_value (const_tree, const_tree, machine_mode);
+extern rtx riscv_function_value (const_tree, const_tree, enum machine_mode);
+extern unsigned int riscv_hard_regno_nregs (int, enum machine_mode);
+extern bool riscv_expand_block_move (rtx, rtx, rtx);
/* Routines implemented in riscv-c.c. */
void riscv_cpu_cpp_builtins (cpp_reader *);
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index 4a16a75..e9783e9 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -2642,6 +2642,162 @@ riscv_legitimize_call_address (rtx addr)
return addr;
}
+/* Emit straight-line code to move LENGTH bytes from SRC to DEST.
+ Assume that the areas do not overlap. */
+
+static void
+riscv_block_move_straight (rtx dest, rtx src, HOST_WIDE_INT length)
+{
+ HOST_WIDE_INT offset, delta;
+ unsigned HOST_WIDE_INT bits;
+ int i;
+ enum machine_mode mode;
+ rtx *regs;
+
+ bits = MAX (BITS_PER_UNIT,
+ MIN (BITS_PER_WORD, MIN (MEM_ALIGN (src), MEM_ALIGN (dest))));
+
+ mode = mode_for_size (bits, MODE_INT, 0);
+ delta = bits / BITS_PER_UNIT;
+
+ /* Allocate a buffer for the temporary registers. */
+ regs = XALLOCAVEC (rtx, length / delta);
+
+ /* Load as many BITS-sized chunks as possible. Use a normal load if
+ the source has enough alignment, otherwise use left/right pairs. */
+ for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+ {
+ regs[i] = gen_reg_rtx (mode);
+ riscv_emit_move (regs[i], adjust_address (src, mode, offset));
+ }
+
+ /* Copy the chunks to the destination. */
+ for (offset = 0, i = 0; offset + delta <= length; offset += delta, i++)
+ riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
+
+ /* Mop up any left-over bytes. */
+ if (offset < length)
+ {
+ src = adjust_address (src, BLKmode, offset);
+ dest = adjust_address (dest, BLKmode, offset);
+ move_by_pieces (dest, src, length - offset,
+ MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), 0);
+ }
+}
+
+/* Helper function for doing a loop-based block operation on memory
+ reference MEM. Each iteration of the loop will operate on LENGTH
+ bytes of MEM.
+
+ Create a new base register for use within the loop and point it to
+ the start of MEM. Create a new memory reference that uses this
+ register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
+
+static void
+riscv_adjust_block_mem (rtx mem, HOST_WIDE_INT length,
+ rtx *loop_reg, rtx *loop_mem)
+{
+ *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
+
+ /* Although the new mem does not refer to a known location,
+ it does keep up to LENGTH bytes of alignment. */
+ *loop_mem = change_address (mem, BLKmode, *loop_reg);
+ set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
+}
+
+/* Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
+ bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
+ the memory regions do not overlap. */
+
+static void
+riscv_block_move_loop (rtx dest, rtx src, HOST_WIDE_INT length,
+ HOST_WIDE_INT bytes_per_iter)
+{
+ rtx label, src_reg, dest_reg, final_src, test;
+ HOST_WIDE_INT leftover;
+
+ leftover = length % bytes_per_iter;
+ length -= leftover;
+
+ /* Create registers and memory references for use within the loop. */
+ riscv_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
+ riscv_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
+
+ /* Calculate the value that SRC_REG should have after the last iteration
+ of the loop. */
+ final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
+ 0, 0, OPTAB_WIDEN);
+
+ /* Emit the start of the loop. */
+ label = gen_label_rtx ();
+ emit_label (label);
+
+ /* Emit the loop body. */
+ riscv_block_move_straight (dest, src, bytes_per_iter);
+
+ /* Move on to the next block. */
+ riscv_emit_move (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
+ riscv_emit_move (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
+
+ /* Emit the loop condition. */
+ test = gen_rtx_NE (VOIDmode, src_reg, final_src);
+ if (Pmode == DImode)
+ emit_jump_insn (gen_cbranchdi4 (test, src_reg, final_src, label));
+ else
+ emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
+
+ /* Mop up any left-over bytes. */
+ if (leftover)
+ riscv_block_move_straight (dest, src, leftover);
+ else
+ emit_insn(gen_nop ());
+}
+
+/* Expand a movmemsi instruction, which copies LENGTH bytes from
+ memory reference SRC to memory reference DEST. */
+
+bool
+riscv_expand_block_move (rtx dest, rtx src, rtx length)
+{
+ if (CONST_INT_P (length))
+ {
+ HOST_WIDE_INT factor, align;
+
+ align = MIN (MIN (MEM_ALIGN (src), MEM_ALIGN (dest)), BITS_PER_WORD);
+ factor = BITS_PER_WORD / align;
+
+ if (optimize_function_for_size_p (cfun)
+ && INTVAL (length) * factor * UNITS_PER_WORD > MOVE_RATIO (false))
+ return false;
+
+ if (INTVAL (length) <= RISCV_MAX_MOVE_BYTES_STRAIGHT / factor)
+ {
+ riscv_block_move_straight (dest, src, INTVAL (length));
+ return true;
+ }
+ else if (optimize && align >= BITS_PER_WORD)
+ {
+ unsigned min_iter_words
+ = RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD;
+ unsigned iter_words = min_iter_words;
+ HOST_WIDE_INT bytes = INTVAL (length), words = bytes / UNITS_PER_WORD;
+
+ /* Lengthen the loop body if it shortens the tail. */
+ for (unsigned i = min_iter_words; i < min_iter_words * 2 - 1; i++)
+ {
+ unsigned cur_cost = iter_words + words % iter_words;
+ unsigned new_cost = i + words % i;
+ if (new_cost <= cur_cost)
+ iter_words = i;
+ }
+
+ riscv_block_move_loop (dest, src, bytes, iter_words * UNITS_PER_WORD);
+ return true;
+ }
+ }
+ return false;
+}
+
/* Print symbolic operand OP, which is part of a HIGH or LO_SUM
in context CONTEXT. HI_RELOC indicates a high-part reloc. */
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index a802a3f..c0901a0 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -808,10 +808,25 @@ while (0)
#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE (POINTER_SIZE == 64 ? "long int" : "int")
-/* If a memory-to-memory move would take MOVE_RATIO or more simple
- move-instruction pairs, we will do a movmem or libcall instead. */
+/* The maximum number of bytes copied by one iteration of a movmemsi loop. */
+
+#define RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER (UNITS_PER_WORD * 4)
+
+/* The maximum number of bytes that can be copied by a straight-line
+ movmemsi implementation. */
-#define MOVE_RATIO(speed) (CLEAR_RATIO (speed) / 2)
+#define RISCV_MAX_MOVE_BYTES_STRAIGHT (RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER * 3)
+
+/* If a memory-to-memory move would take MOVE_RATIO or more simple
+ move-instruction pairs, we will do a movmem or libcall instead.
+ Do not use move_by_pieces at all when strict alignment is not
+ in effect but the target has slow unaligned accesses; in this
+ case, movmem or libcall is more efficient. */
+
+#define MOVE_RATIO(speed) \
+ (!STRICT_ALIGNMENT && riscv_slow_unaligned_access ? 1 : \
+ (speed) ? RISCV_MAX_MOVE_BYTES_PER_LOOP_ITER / UNITS_PER_WORD : \
+ CLEAR_RATIO (speed) / 2)
/* For CLEAR_RATIO, when optimizing for size, give a better estimate
of the length of a memset call, but use the default otherwise. */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index 53e1db9..814ff6e 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1436,6 +1436,19 @@
DONE;
})
+(define_expand "movmemsi"
+ [(parallel [(set (match_operand:BLK 0 "general_operand")
+ (match_operand:BLK 1 "general_operand"))
+ (use (match_operand:SI 2 ""))
+ (use (match_operand:SI 3 "const_int_operand"))])]
+ ""
+{
+ if (riscv_expand_block_move (operands[0], operands[1], operands[2]))
+ DONE;
+ else
+ FAIL;
+})
+
;; Expand in-line code to clear the instruction cache between operand[0] and
;; operand[1].
(define_expand "clear_cache"