diff options
author | Richard Ball <richard.ball@arm.com> | 2023-10-26 16:18:50 +0100 |
---|---|---|
committer | Richard Ball <richard.ball@arm.com> | 2023-10-26 16:18:50 +0100 |
commit | 7006e5d2d7b5b2f38c84a5edd701f154ce18a230 (patch) | |
tree | 0a23aca629779c0c062a94e6884a5488209d0ea0 /gcc | |
parent | 2ae00adb326ed050bd67a67656b20a2cfe789626 (diff) | |
download | gcc-7006e5d2d7b5b2f38c84a5edd701f154ce18a230.zip gcc-7006e5d2d7b5b2f38c84a5edd701f154ce18a230.tar.gz gcc-7006e5d2d7b5b2f38c84a5edd701f154ce18a230.tar.bz2 |
arm: Use deltas for Arm switch tables
For normal optimization for the Arm state in gcc we get an uncompressed
table of jump targets. This is in the middle of the text segment
far larger than necessary, especially at -Os.
This patch compresses the table to use deltas in a similar manner to
Thumb code generation.
Similar code is also used for -fpic where we currently generate a jump
to a jump. In this format the jumps are too dense for the hardware branch
predictor to handle accurately, so execution is likely to be very expensive.
Changes to switch statements for arm include a new function to handle the
assembly generation for different machine modes. This allows for more
optimisation to be performed in aout.h where arm has switched from using
ASM_OUTPUT_ADDR_VEC_ELT to using ASM_OUTPUT_ADDR_DIFF_ELT.
In ASM_OUTPUT_ADDR_DIFF_ELT new assembly generation options have been
added to utilise the different machine modes. Additional changes
made to the casesi expand and insn, CASE_VECTOR_PC_RELATIVE,
CASE_VECTOR_SHORTEN_MODE and LABEL_ALIGN_AFTER_BARRIER are all
to accomodate this new approach to switch statement generation.
New tests have been added and no regressions on arm-none-eabi.
gcc/ChangeLog:
* config/arm/aout.h (ASM_OUTPUT_ADDR_DIFF_ELT): Add table output
for different machine modes for arm.
* config/arm/arm-protos.h (arm_output_casesi): New prototype.
* config/arm/arm.h (CASE_VECTOR_PC_RELATIVE): Make arm use
ASM_OUTPUT_ADDR_DIFF_ELT.
(CASE_VECTOR_SHORTEN_MODE): Change table size calculation for
TARGET_ARM.
(LABEL_ALIGN_AFTER_BARRIER): Change to accommodate .p2align 2
for TARGET_ARM.
* config/arm/arm.cc (arm_output_casesi): New function.
* config/arm/arm.md (arm_casesi_internal): Change casesi expand
and insn.
for arm to use new function arm_output_casesi.
gcc/testsuite/ChangeLog:
* gcc.target/arm/arm-switchstatement.c: New test.
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arm/aout.h | 23 | ||||
-rw-r--r-- | gcc/config/arm/arm-protos.h | 1 | ||||
-rw-r--r-- | gcc/config/arm/arm.cc | 45 | ||||
-rw-r--r-- | gcc/config/arm/arm.h | 20 | ||||
-rw-r--r-- | gcc/config/arm/arm.md | 14 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/arm-switchstatement.c | 151 |
6 files changed, 242 insertions, 12 deletions
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h index 57c3b9b..6a4c8da 100644 --- a/gcc/config/arm/aout.h +++ b/gcc/config/arm/aout.h @@ -183,7 +183,28 @@ do \ { \ if (TARGET_ARM) \ - asm_fprintf (STREAM, "\tb\t%LL%d\n", VALUE); \ + { \ + switch (GET_MODE (body)) \ + { \ + case E_QImode: \ + asm_fprintf (STREAM, "\t.byte\t(%LL%d-%LL%d-4)/4\n", \ + VALUE, REL); \ + break; \ + case E_HImode: \ + asm_fprintf (STREAM, "\t.2byte\t(%LL%d-%LL%d-4)/4\n", \ + VALUE, REL); \ + break; \ + case E_SImode: \ + if (flag_pic) \ + asm_fprintf (STREAM, "\t.word\t%LL%d-%LL%d-4\n", \ + VALUE, REL); \ + else \ + asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE); \ + break; \ + default: \ + gcc_unreachable (); \ + } \ + } \ else if (TARGET_THUMB1) \ { \ if (flag_pic || optimize_size) \ diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 77e7633..2f5ca79 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -261,6 +261,7 @@ extern void thumb_expand_cpymemqi (rtx *); extern rtx arm_return_addr (int, rtx); extern void thumb_reload_out_hi (rtx *); extern void thumb_set_return_address (rtx, rtx); +extern const char *arm_output_casesi (rtx *); extern const char *thumb1_output_casesi (rtx *); extern const char *thumb2_output_casesi (rtx *); #endif diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc index 6e933c8..4e5e699 100644 --- a/gcc/config/arm/arm.cc +++ b/gcc/config/arm/arm.cc @@ -30464,6 +30464,51 @@ arm_output_iwmmxt_tinsr (rtx *operands) return ""; } +/* Output an arm casesi dispatch sequence. Used by arm_casesi_internal insn. + Responsible for the handling of switch statements in arm. */ +const char * +arm_output_casesi (rtx *operands) +{ + rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2]))); + + gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC); + + output_asm_insn ("cmp\t%0, %1", operands); + output_asm_insn ("bhi\t%l3", operands); + switch (GET_MODE (diff_vec)) + { + case E_QImode: + output_asm_insn ("adr\t%4, %l2", operands); + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + output_asm_insn ("ldrb\t%4, [%4, %0]", operands); + else + output_asm_insn ("ldrsb\t%4, [%4, %0]", operands); + return "add\t%|pc, %|pc, %4, lsl #2"; + + case E_HImode: + output_asm_insn ("adr\t%4, %l2", operands); + output_asm_insn ("add\t%4, %4, %0", operands); + if (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned) + output_asm_insn ("ldrh\t%4, [%4, %0]", operands); + else + output_asm_insn ("ldrsh\t%4, [%4, %0]", operands); + return "add\t%|pc, %|pc, %4, lsl #2"; + + case E_SImode: + if (flag_pic) + { + output_asm_insn ("adr\t%4, %l2", operands); + output_asm_insn ("ldr\t%4, [%4, %0, lsl #2]", operands); + return "add\t%|pc, %|pc, %4"; + } + output_asm_insn ("adr\t%4, %l2", operands); + return "ldr\t%|pc, [%4, %0, lsl #2]"; + + default: + gcc_unreachable (); + } +} + /* Output a Thumb-1 casesi dispatch sequence. */ const char * thumb1_output_casesi (rtx *operands) diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h index 4f54530..3063e34 100644 --- a/gcc/config/arm/arm.h +++ b/gcc/config/arm/arm.h @@ -2092,7 +2092,7 @@ enum arm_auto_incmodes for the index in the tablejump instruction. */ #define CASE_VECTOR_MODE Pmode -#define CASE_VECTOR_PC_RELATIVE ((TARGET_THUMB2 \ +#define CASE_VECTOR_PC_RELATIVE ((TARGET_ARM || TARGET_THUMB2 \ || (TARGET_THUMB1 \ && (optimize_size || flag_pic))) \ && (!target_pure_code)) @@ -2109,9 +2109,19 @@ enum arm_auto_incmodes : min >= -4096 && max < 4096 \ ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ : SImode) \ - : ((min < 0 || max >= 0x20000 || !TARGET_THUMB2) ? SImode \ - : (max >= 0x200) ? HImode \ - : QImode)) + : (TARGET_THUMB2 \ + ? ((min > 0 && max < 0x200) ? QImode \ + : (min > 0 && max <= 0x20000) ? HImode \ + : SImode) \ + : ((min >= 0 && max < 1024) \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, QImode) \ + : (min >= -512 && max <= 508) \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, QImode) \ + :(min >= 0 && max < 262144) \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 1, HImode) \ + : (min >= -131072 && max <=131068) \ + ? (ADDR_DIFF_VEC_FLAGS (body).offset_unsigned = 0, HImode) \ + : SImode))) /* signed 'char' is most compatible, but RISC OS wants it unsigned. unsigned is probably best, but may break some code. */ @@ -2301,7 +2311,7 @@ extern int making_const_table; #define LABEL_ALIGN_AFTER_BARRIER(LABEL) \ (GET_CODE (PATTERN (prev_active_insn (LABEL))) == ADDR_DIFF_VEC \ - ? 1 : 0) + ? (TARGET_ARM ? 2 : 1) : 0) #define ARM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL) \ arm_declare_function_name ((STREAM), (NAME), (DECL)); diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md index 2ac9723..0b2eb4b 100644 --- a/gcc/config/arm/arm.md +++ b/gcc/config/arm/arm.md @@ -9556,6 +9556,8 @@ (match_dup 4) (label_ref:SI (match_operand 3 "")))) (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 5)) + (clobber (match_scratch:SI 6)) (use (label_ref:SI (match_operand 2 "")))])] "TARGET_ARM" { @@ -9576,15 +9578,15 @@ (label_ref:SI (match_operand 2 "" "")))) (label_ref:SI (match_operand 3 "" "")))) (clobber (reg:CC CC_REGNUM)) + (clobber (match_scratch:SI 4 "=&r")) + (clobber (match_scratch:SI 5 "=r")) (use (label_ref:SI (match_dup 2)))])] "TARGET_ARM" - "* - if (flag_pic) - return \"cmp\\t%0, %1\;addls\\t%|pc, %|pc, %0, asl #2\;b\\t%l3\"; - return \"cmp\\t%0, %1\;ldrls\\t%|pc, [%|pc, %0, asl #2]\;b\\t%l3\"; - " + { + return arm_output_casesi (operands); + } [(set_attr "conds" "clob") - (set_attr "length" "12") + (set_attr "length" "24") (set_attr "type" "multiple")] ) diff --git a/gcc/testsuite/gcc.target/arm/arm-switchstatement.c b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c new file mode 100644 index 0000000..a7aa9d4 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/arm-switchstatement.c @@ -0,0 +1,151 @@ +/* { dg-do compile } */ +/* { dg-options "-O2 --param case-values-threshold=1 -fno-reorder-blocks -fno-tree-dce" } */ +/* { dg-require-effective-target arm_nothumb } */ +/* { dg-final { check-function-bodies "**" "" "" } } */ + +#define NOP "nop;" +#define NOP2 NOP NOP +#define NOP4 NOP2 NOP2 +#define NOP8 NOP4 NOP4 +#define NOP16 NOP8 NOP8 +#define NOP32 NOP16 NOP16 +#define NOP64 NOP32 NOP32 +#define NOP128 NOP64 NOP64 +#define NOP256 NOP128 NOP128 +#define NOP512 NOP256 NOP256 +#define NOP1024 NOP512 NOP512 +#define NOP2048 NOP1024 NOP1024 +#define NOP4096 NOP2048 NOP2048 +#define NOP8192 NOP4096 NOP4096 +#define NOP16384 NOP8192 NOP8192 +#define NOP32768 NOP16384 NOP16384 +#define NOP65536 NOP32768 NOP32768 +#define NOP131072 NOP65536 NOP65536 + +enum z +{ + a = 1, + b, + c, + d, + e, + f = 7, +}; + +inline void QIFunction (const char* flag) +{ + asm volatile (NOP32); + return; +} + +inline void HIFunction (const char* flag) +{ + asm volatile (NOP512); + return; +} + +inline void SIFunction (const char* flag) +{ + asm volatile (NOP131072); + return; +} + +/* +**QImode_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldrb \1, \[\1, r[0-9]+\] +** add pc, pc, \1, lsl #2 +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* QImode_test(enum z x) +{ + switch (x) + { + case d: + QIFunction("QItest"); + return "InlineASM"; + case f: + return "TEST"; + default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".byte" } } */ + +/* +**HImode_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** add \1, \1, (r[0-9]+) +** ldrh \1, \[\1, \2\] +** add pc, pc, \1, lsl #2 +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* HImode_test(enum z x) +{ + switch (x) + { + case d: + HIFunction("HItest"); + return "InlineASM"; + case f: + return "TEST"; + default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".2byte" } } */ + +/* +**SImode_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** ldr pc, \[\1, r[0-9]+, lsl #2\] +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* SImode_test(enum z x) +{ + switch (x) + { + case d: + SIFunction("SItest"); + return "InlineASM"; + case f: + return "TEST"; + default: + return "Default"; + } +} + +/* { dg-final { scan-assembler ".word" } } */ + +/* +**backwards_branch_test: +** ... +** adr (r[0-9]+), .L[0-9]+ +** add \1, \1, (r[0-9]+) +** ldrsh \1, \[\1, \2\] +** add pc, pc, \1, lsl #2 +** ... +*/ +__attribute__ ((noinline)) __attribute__ ((noclone)) const char* backwards_branch_test(enum z x, int flag) +{ + if (flag == 5) + { + backwards: + asm volatile (NOP512); + return "ASM"; + } + switch (x) + { + case d: + goto backwards; + case f: + return "TEST"; + default: + return "Default"; + } +}
\ No newline at end of file |