aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChristophe Lyon <christophe.lyon@linaro.org>2020-11-02 14:39:24 +0000
committerChristophe Lyon <christophe.lyon@linaro.org>2020-11-02 14:39:24 +0000
commit011f5e92f8ff87f099ed0aae736e79be20a77c6c (patch)
treef5165d29fb4b21c5a110b4371507e3a866fef47d
parent79680c1d5cd3d89c2e7423e20dc8a6e1d6dc8151 (diff)
downloadgcc-011f5e92f8ff87f099ed0aae736e79be20a77c6c.zip
gcc-011f5e92f8ff87f099ed0aae736e79be20a77c6c.tar.gz
gcc-011f5e92f8ff87f099ed0aae736e79be20a77c6c.tar.bz2
arm: Improve thumb1_gen_const_int
Enable thumb1_gen_const_int to generate RTL or asm depending on the context, so that we avoid duplicating code to handle constants in Thumb-1 with -mpure-code. Use a template so that the algorithm is effectively shared, and rely on two classes to handle the actual emission as RTL or asm. The generated sequence is improved to handle right-shiftable and small values with less instructions. We now generate: 128: movs r0, r0, #128 264: movs r3, #33 lsls r3, #3 510: movs r3, #255 lsls r3, #1 512: movs r3, #1 lsls r3, #9 764: movs r3, #191 lsls r3, #2 65536: movs r3, #1 lsls r3, #16 0x123456: movs r3, #18 ;0x12 lsls r3, #8 adds r3, #52 ;0x34 lsls r3, #8 adds r3, #86 ;0x56 0x1123456: movs r3, #137 ;0x89 lsls r3, #8 adds r3, #26 ;0x1a lsls r3, #8 adds r3, #43 ;0x2b lsls r3, #1 0x1000010: movs r3, #16 lsls r3, #16 adds r3, #1 lsls r3, #4 0x1000011: movs r3, #1 lsls r3, #24 adds r3, #17 -8192: movs r3, #1 lsls r3, #13 rsbs r3, #0 The patch adds a testcase which does not fully exercise thumb1_gen_const_int, as other existing patterns already catch small constants. These parts of thumb1_gen_const_int are used by arm_thumb1_mi_thunk. 2020-11-02 Christophe Lyon <christophe.lyon@linaro.org> gcc/ * config/arm/arm.c (thumb1_const_rtl, thumb1_const_print): New classes. (thumb1_gen_const_int): Rename to ... (thumb1_gen_const_int_1): ... New helper function. Add capability to emit either RTL or asm, improve generated code. (thumb1_gen_const_int_rtl): New function. * config/arm/arm-protos.h (thumb1_gen_const_int): Rename to thumb1_gen_const_int_rtl. * config/arm/thumb1.md: Call thumb1_gen_const_int_rtl instead of thumb1_gen_const_int. gcc/testsuite/ * gcc.target/arm/pure-code/no-literal-pool-m0.c: New.
-rw-r--r--gcc/config/arm/arm-protos.h2
-rw-r--r--gcc/config/arm/arm.c224
-rw-r--r--gcc/config/arm/thumb1.md2
-rw-r--r--gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c175
4 files changed, 369 insertions, 34 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 703d616..5b581e0 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -74,7 +74,7 @@ extern bool arm_small_register_classes_for_mode_p (machine_mode);
extern int const_ok_for_arm (HOST_WIDE_INT);
extern int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
extern int const_ok_for_dimode_op (HOST_WIDE_INT, enum rtx_code);
-extern void thumb1_gen_const_int (rtx, HOST_WIDE_INT);
+extern void thumb1_gen_const_int_rtl (rtx, HOST_WIDE_INT);
extern int arm_split_constant (RTX_CODE, machine_mode, rtx,
HOST_WIDE_INT, rtx, rtx, int);
extern int legitimate_pic_operand_p (rtx);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index ae05891..203d2b6 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -4528,38 +4528,6 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
}
}
-/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
- Avoid generating useless code when one of the bytes is zero. */
-void
-thumb1_gen_const_int (rtx op0, HOST_WIDE_INT op1)
-{
- bool mov_done_p = false;
- int i;
-
- /* Emit upper 3 bytes if needed. */
- for (i = 0; i < 3; i++)
- {
- int byte = (op1 >> (8 * (3 - i))) & 0xff;
-
- if (byte)
- {
- emit_set_insn (op0, mov_done_p
- ? gen_rtx_PLUS (SImode,op0, GEN_INT (byte))
- : GEN_INT (byte));
- mov_done_p = true;
- }
-
- if (mov_done_p)
- emit_set_insn (op0, gen_rtx_ASHIFT (SImode, op0, GEN_INT (8)));
- }
-
- /* Emit lower byte if needed. */
- if (!mov_done_p)
- emit_set_insn (op0, GEN_INT (op1 & 0xff));
- else if (op1 & 0xff)
- emit_set_insn (op0, gen_rtx_PLUS (SImode, op0, GEN_INT (op1 & 0xff)));
-}
-
/* Emit a sequence of insns to handle a large constant.
CODE is the code of the operation required, it can be any of SET, PLUS,
IOR, AND, XOR, MINUS;
@@ -28263,6 +28231,198 @@ arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
default_internal_label (stream, prefix, labelno);
}
+/* Define classes to generate code as RTL or output asm to a file.
+ Using templates then allows to use the same code to output code
+ sequences in the two formats. */
+class thumb1_const_rtl
+{
+ public:
+ thumb1_const_rtl (rtx dst) : dst (dst) {}
+
+ void mov (HOST_WIDE_INT val)
+ {
+ emit_set_insn (dst, GEN_INT (val));
+ }
+
+ void add (HOST_WIDE_INT val)
+ {
+ emit_set_insn (dst, gen_rtx_PLUS (SImode, dst, GEN_INT (val)));
+ }
+
+ void ashift (HOST_WIDE_INT shift)
+ {
+ emit_set_insn (dst, gen_rtx_ASHIFT (SImode, dst, GEN_INT (shift)));
+ }
+
+ void neg ()
+ {
+ emit_set_insn (dst, gen_rtx_NEG (SImode, dst));
+ }
+
+ private:
+ rtx dst;
+};
+
+class thumb1_const_print
+{
+ public:
+ thumb1_const_print (FILE *f, int regno)
+ {
+ t_file = f;
+ dst_regname = reg_names[regno];
+ }
+
+ void mov (HOST_WIDE_INT val)
+ {
+ asm_fprintf (t_file, "\tmovs\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
+ dst_regname, val);
+ }
+
+ void add (HOST_WIDE_INT val)
+ {
+ asm_fprintf (t_file, "\tadds\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
+ dst_regname, val);
+ }
+
+ void ashift (HOST_WIDE_INT shift)
+ {
+ asm_fprintf (t_file, "\tlsls\t%s, #" HOST_WIDE_INT_PRINT_DEC "\n",
+ dst_regname, shift);
+ }
+
+ void neg ()
+ {
+ asm_fprintf (t_file, "\trsbs\t%s, #0\n", dst_regname);
+ }
+
+ private:
+ FILE *t_file;
+ const char *dst_regname;
+};
+
+/* Emit a sequence of movs/adds/shift to produce a 32-bit constant.
+ Avoid generating useless code when one of the bytes is zero. */
+template <class T>
+void
+thumb1_gen_const_int_1 (T dst, HOST_WIDE_INT op1)
+{
+ bool mov_done_p = false;
+ unsigned HOST_WIDE_INT val = op1;
+ int shift = 0;
+ int i;
+
+ gcc_assert (op1 == trunc_int_for_mode (op1, SImode));
+
+ if (val <= 255)
+ {
+ dst.mov (val);
+ return;
+ }
+
+ /* For negative numbers with the first nine bits set, build the
+ opposite of OP1, then negate it, it's generally shorter and not
+ longer. */
+ if ((val & 0xFF800000) == 0xFF800000)
+ {
+ thumb1_gen_const_int_1 (dst, -op1);
+ dst.neg ();
+ return;
+ }
+
+ /* In the general case, we need 7 instructions to build
+ a 32 bits constant (1 movs, 3 lsls, 3 adds). We can
+ do better if VAL is small enough, or
+ right-shiftable by a suitable amount. If the
+ right-shift enables to encode at least one less byte,
+ it's worth it: we save a adds and a lsls at the
+ expense of a final lsls. */
+ int final_shift = number_of_first_bit_set (val);
+
+ int leading_zeroes = clz_hwi (val);
+ int number_of_bytes_needed
+ = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes)
+ / BITS_PER_UNIT) + 1;
+ int number_of_bytes_needed2
+ = ((HOST_BITS_PER_WIDE_INT - 1 - leading_zeroes - final_shift)
+ / BITS_PER_UNIT) + 1;
+
+ if (number_of_bytes_needed2 < number_of_bytes_needed)
+ val >>= final_shift;
+ else
+ final_shift = 0;
+
+ /* If we are in a very small range, we can use either a single movs
+ or movs+adds. */
+ if (val <= 510)
+ {
+ if (val > 255)
+ {
+ unsigned HOST_WIDE_INT high = val - 255;
+
+ dst.mov (high);
+ dst.add (255);
+ }
+ else
+ dst.mov (val);
+
+ if (final_shift > 0)
+ dst.ashift (final_shift);
+ }
+ else
+ {
+ /* General case, emit upper 3 bytes as needed. */
+ for (i = 0; i < 3; i++)
+ {
+ unsigned HOST_WIDE_INT byte = (val >> (8 * (3 - i))) & 0xff;
+
+ if (byte)
+ {
+ /* We are about to emit new bits, stop accumulating a
+ shift amount, and left-shift only if we have already
+ emitted some upper bits. */
+ if (mov_done_p)
+ {
+ dst.ashift (shift);
+ dst.add (byte);
+ }
+ else
+ dst.mov (byte);
+
+ /* Stop accumulating shift amount since we've just
+ emitted some bits. */
+ shift = 0;
+
+ mov_done_p = true;
+ }
+
+ if (mov_done_p)
+ shift += 8;
+ }
+
+ /* Emit lower byte. */
+ if (!mov_done_p)
+ dst.mov (val & 0xff);
+ else
+ {
+ dst.ashift (shift);
+ if (val & 0xff)
+ dst.add (val & 0xff);
+ }
+
+ if (final_shift > 0)
+ dst.ashift (final_shift);
+ }
+}
+
+/* Proxy for thumb1.md, since the thumb1_const_print and
+ thumb1_const_rtl classes are not exported. */
+void
+thumb1_gen_const_int_rtl (rtx dst, HOST_WIDE_INT op1)
+{
+ thumb1_const_rtl t (dst);
+ thumb1_gen_const_int_1 (t, op1);
+}
+
/* Output code to add DELTA to the first argument, and then jump
to FUNCTION. Used for C++ multiple inheritance. */
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index 320e78d..e2fcb10 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -820,7 +820,7 @@
&& !satisfies_constraint_K (operands[1])"
[(clobber (const_int 0))]
"
- thumb1_gen_const_int (operands[0], INTVAL (operands[1]));
+ thumb1_gen_const_int_rtl (operands[0], INTVAL (operands[1]));
DONE;
"
)
diff --git a/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
new file mode 100644
index 0000000..787a61a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/pure-code/no-literal-pool-m0.c
@@ -0,0 +1,175 @@
+/* { dg-do compile } */
+/* { dg-options "-mpure-code -mcpu=cortex-m0 -march=armv6s-m -mthumb" } */
+/* { dg-final { check-function-bodies "**" "" } } */
+
+/* Does not use thumb1_gen_const_int.
+** test_0:
+** ...
+** movs r[0-3], #0
+** ...
+*/
+int
+test_0 ()
+{
+ return 0;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_128:
+** ...
+** movs r[0-3], #128
+** ...
+*/
+int
+test_128 ()
+{
+ return 128;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_264:
+** ...
+** movs r[0-3], #132
+** lsls r[0-3], r[0-3], #1
+** ...
+*/
+int
+test_264 ()
+{
+ return 264;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_510:
+** ...
+** movs r[0-3], #255
+** lsls r[0-3], r[0-3], #1
+** ...
+*/
+int
+test_510 ()
+{
+ return 510;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_512:
+** ...
+** movs r[0-3], #128
+** lsls r[0-3], r[0-3], #2
+** ...
+*/
+int
+test_512 ()
+{
+ return 512;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_764:
+** ...
+** movs r[0-3], #191
+** lsls r[0-3], r[0-3], #2
+** ...
+*/
+int
+test_764 ()
+{
+ return 764;
+}
+
+/* Does not use thumb1_gen_const_int.
+** test_65536:
+** ...
+** movs r[0-3], #128
+** lsls r[0-3], r[0-3], #9
+** ...
+*/
+int
+test_65536 ()
+{
+ return 65536;
+}
+
+/*
+** test_0x123456:
+** ...
+** movs r[0-3], #18
+** lsls r[0-3], r[0-3], #8
+** adds r[0-3], r[0-3], #52
+** lsls r[0-3], r[0-3], #8
+** adds r[0-3], r[0-3], #86
+** ...
+*/
+int
+test_0x123456 ()
+{
+ return 0x123456;
+}
+
+/*
+** test_0x1123456:
+** ...
+** movs r[0-3], #137
+** lsls r[0-3], r[0-3], #8
+** adds r[0-3], r[0-3], #26
+** lsls r[0-3], r[0-3], #8
+** adds r[0-3], r[0-3], #43
+** lsls r[0-3], r[0-3], #1
+** ...
+*/
+int
+test_0x1123456 ()
+{
+ return 0x1123456;
+}
+
+/* With -Os, we generate:
+ movs r0, #16
+ lsls r0, r0, r0
+ With the other optimization levels, we generate:
+ movs r0, #16
+ lsls r0, r0, #16
+ hence the two alternatives. */
+/*
+** test_0x1000010:
+** ...
+** movs r[0-3], #16
+** lsls r[0-3], r[0-3], (#16|r[0-3])
+** adds r[0-3], r[0-3], #1
+** lsls r[0-3], r[0-3], #4
+** ...
+*/
+int
+test_0x1000010 ()
+{
+ return 0x1000010;
+}
+
+/*
+** test_0x1000011:
+** ...
+** movs r[0-3], #1
+** lsls r[0-3], r[0-3], #24
+** adds r[0-3], r[0-3], #17
+** ...
+*/
+int
+test_0x1000011 ()
+{
+ return 0x1000011;
+}
+
+/*
+** test_m8192:
+** ...
+** movs r[0-3], #1
+** lsls r[0-3], r[0-3], #13
+** rsbs r[0-3], r[0-3], #0
+** ...
+*/
+int
+test_m8192 ()
+{
+ return -8192;
+}