aboutsummaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
authorTakayuki 'January June' Suwa <jjsuwa_sys3175@yahoo.co.jp>2025-08-29 19:55:06 +0900
committerMax Filippov <jcmvbkbc@gmail.com>2025-08-29 10:54:32 -0700
commitba9d4b3ce59432f3e7cef5c650b088a12e7ff877 (patch)
treefd1a70c2610cec2bc418645decc6078b0e589969 /gcc
parent25bbc0f20a25a1db59baaf36d119ce274968747f (diff)
downloadgcc-ba9d4b3ce59432f3e7cef5c650b088a12e7ff877.zip
gcc-ba9d4b3ce59432f3e7cef5c650b088a12e7ff877.tar.gz
gcc-ba9d4b3ce59432f3e7cef5c650b088a12e7ff877.tar.bz2
xtensa: Rewrite bswapsi2_internal with compact syntax
Also, the omission of the instruction that sets the shift amount register (SAR) to 8 is now more efficient: it is omitted if there was a previous bswapsi2 in the same BB, but not omitted if no bswapsi2 is found or another insn that modifies SAR is found first (see below). Note that the five instructions for writing to SAR are as follows, along with the insns that use them (except for bswapsi2_internal itself): - SSA8B *shift_per_byte, *shlrd_per_byte - SSA8L *shift_per_byte, *shlrd_per_byte - SSR ashrsi3 (alt 1), lshrsi3 (alt 1), *shlrd_reg, rotrsi3 (alt 1) - SSL ashlsi3_internal (alt 1), *shlrd_reg, rotlsi3 (alt 1) - SSAI *shlrd_const, rotlsi3 (alt 0), rotrsi3 (alt 0) gcc/ChangeLog: * config/xtensa/xtensa-protos.h (xtensa_bswapsi2_output): New function prototype. * config/xtensa/xtensa.cc (xtensa_bswapsi2_output_1, xtensa_bswapsi2_output): New functions. * config/xtensa/xtensa.md (bswapsi2_internal): Rewrite in compact syntax and use xtensa_bswapsi2_output() as asm output. gcc/testsuite/ChangeLog: * gcc.target/xtensa/bswap-SSAI8.c: New.
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/xtensa/xtensa-protos.h1
-rw-r--r--gcc/config/xtensa/xtensa.cc88
-rw-r--r--gcc/config/xtensa/xtensa.md37
-rw-r--r--gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c29
4 files changed, 126 insertions, 29 deletions
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 1f5dcf5..98e75c6 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -60,6 +60,7 @@ extern bool xtensa_tls_referenced_p (rtx);
extern enum rtx_code xtensa_shlrd_which_direction (rtx, rtx);
extern bool xtensa_split1_finished_p (void);
extern void xtensa_split_DI_reg_imm (rtx *);
+extern char *xtensa_bswapsi2_output (rtx_insn *, const char *);
#ifdef TREE_CODE
extern void init_cumulative_args (CUMULATIVE_ARGS *, int);
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index d75cba4..f3b89de 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -2645,6 +2645,94 @@ xtensa_split_DI_reg_imm (rtx *operands)
}
+/* Return the asm output string of bswapsi2_internal insn pattern.
+ It does this by scanning backwards for the BB from the specified insn,
+ and if an another bswapsi2_internal is found, it omits the instruction
+ to set SAR to 8. If not found, or if a CALL, JUMP, ASM, or other insn
+ that clobbers SAR is found first, prepend an instruction to set SAR to
+ 8 as usual. */
+
+static int
+xtensa_bswapsi2_output_1 (rtx_insn *insn)
+{
+ int icode;
+ rtx pat;
+ const char *iname;
+
+ /* CALL insn do not preserve SAR.
+ JUMP insn only appear at the end of BB, so they do not need to be
+ considered when scanning backwards. */
+ if (CALL_P (insn))
+ return -1;
+
+ switch (icode = INSN_CODE (insn))
+ {
+ /* rotate insns clobber SAR. */
+ case CODE_FOR_rotlsi3:
+ case CODE_FOR_rotrsi3:
+ return -1;
+ /* simple shift insns clobber SAR if non-immediate shift amounts. */
+ case CODE_FOR_ashlsi3_internal:
+ case CODE_FOR_ashrsi3:
+ case CODE_FOR_lshrsi3:
+ if (! CONST_INT_P (XEXP (SET_SRC (PATTERN (insn)), 1)))
+ return -1;
+ break;
+ /* this insn always set SAR to 8. */
+ case CODE_FOR_bswapsi2_internal:
+ return 1;
+ default:
+ break;
+ }
+
+ /* "*shift_per_byte" and "*shlrd_*" complex shift insns clobber SAR. */
+ if (icode >= CODE_FOR_nothing
+ && (! strcmp (iname = insn_data[icode].name, "*shift_per_byte")
+ || ! strncmp (iname, "*shlrd_", 7)))
+ return -1;
+
+ /* asm statements may also clobber SAR, so they are anything goes. */
+ if (NONJUMP_INSN_P (insn))
+ switch (GET_CODE (pat = PATTERN (insn)))
+ {
+ case SET:
+ return GET_CODE (SET_SRC (pat)) == ASM_OPERANDS ? -1 : 0;
+ case PARALLEL:
+ return (GET_CODE (pat = XVECEXP (pat, 0, 0)) == SET
+ && GET_CODE (SET_SRC (pat)) == ASM_OPERANDS)
+ || GET_CODE (pat) == ASM_OPERANDS
+ || GET_CODE (pat) == ASM_INPUT ? -1 : 0;
+ case ASM_OPERANDS:
+ return -1;
+ default:
+ break;
+ }
+
+ /* All other insns are not interested in SAR. */
+ return 0;
+}
+
+char *
+xtensa_bswapsi2_output (rtx_insn *insn, const char *output)
+{
+ static char result[128];
+ int i;
+
+ strcpy (result, "ssai\t8\n\t");
+ while ((insn = prev_nonnote_nondebug_insn_bb (insn)))
+ if ((i = xtensa_bswapsi2_output_1 (insn)) < 0)
+ break;
+ else if (i > 0)
+ {
+ result[0] = '\0';
+ break;
+ }
+ strcat (result, output);
+
+ return result;
+}
+
+
/* Try to split an integer value into what are suitable for two consecutive
immediate addition instructions, ADDI or ADDMI. */
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index ab0403d..1339b03 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -649,36 +649,15 @@
})
(define_insn "bswapsi2_internal"
- [(set (match_operand:SI 0 "register_operand" "=a,&a")
- (bswap:SI (match_operand:SI 1 "register_operand" "0,r")))
- (clobber (match_scratch:SI 2 "=&a,X"))]
+ [(set (match_operand:SI 0 "register_operand")
+ (bswap:SI (match_operand:SI 1 "register_operand")))
+ (clobber (match_scratch:SI 2))]
"!optimize_debug && optimize > 1 && !optimize_size"
-{
- rtx_insn *prev_insn = prev_nonnote_nondebug_insn (insn);
- const char *init = "ssai\t8\;";
- static char result[128];
- if (prev_insn && NONJUMP_INSN_P (prev_insn))
- {
- rtx x = PATTERN (prev_insn);
- if (GET_CODE (x) == PARALLEL && XVECLEN (x, 0) == 2
- && GET_CODE (XVECEXP (x, 0, 0)) == SET
- && GET_CODE (XVECEXP (x, 0, 1)) == CLOBBER)
- {
- x = XEXP (XVECEXP (x, 0, 0), 1);
- if (GET_CODE (x) == BSWAP && GET_MODE (x) == SImode)
- init = "";
- }
- }
- sprintf (result,
- (which_alternative == 0)
- ? "%s" "srli\t%%2, %%1, 16\;src\t%%2, %%2, %%1\;src\t%%2, %%2, %%2\;src\t%%0, %%1, %%2"
- : "%s" "srli\t%%0, %%1, 16\;src\t%%0, %%0, %%1\;src\t%%0, %%0, %%0\;src\t%%0, %%1, %%0",
- init);
- return result;
-}
- [(set_attr "type" "arith,arith")
- (set_attr "mode" "SI")
- (set_attr "length" "15,15")])
+ {@ [cons: =0, 1, =2; attrs: type, length]
+ [ a, 0, &a; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%2, %1, 16\;src\t%2, %2, %1\;src\t%2, %2, %2\;src\t%0, %1, %2");
+ [&a, r, X; arith, 15] << xtensa_bswapsi2_output (insn, "srli\t%0, %1, 16\;src\t%0, %0, %1\;src\t%0, %0, %0\;src\t%0, %1, %0");
+ }
+ [(set_attr "mode" "SI")])
(define_expand "bswapdi2"
[(set (match_operand:DI 0 "register_operand" "")
diff --git a/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
new file mode 100644
index 0000000..010554b
--- /dev/null
+++ b/gcc/testsuite/gcc.target/xtensa/bswap-SSAI8.c
@@ -0,0 +1,29 @@
+/* { dg-do compile } */
+/* { dg-options "-O2" } */
+
+extern void foo(void);
+
+void test_0(volatile unsigned int a[], unsigned int b)
+{
+ a[0] = __builtin_bswap32(a[0]);
+ a[1] = a[1] >> 9;
+ a[2] = __builtin_bswap32(a[2]);
+ a[3] = a[3] << b;
+ a[4] = __builtin_bswap32(a[4]);
+ foo();
+ a[5] = __builtin_bswap32(a[5]);
+ a[6] = __builtin_stdc_rotate_left (a[6], 13);
+ a[7] = __builtin_bswap32(a[7]);
+ asm volatile ("# asm volatile");
+ a[8] = __builtin_bswap32(a[8]);
+ a[9] = (a[9] << 9) | (b >> 23);
+ a[10] = __builtin_bswap32(a[10]);
+}
+
+void test_1(volatile unsigned long long a[])
+{
+ a[0] = __builtin_bswap64(a[0]);
+ a[1] = __builtin_bswap64(a[1]);
+}
+
+/* { dg-final { scan-assembler-times "ssai\t8" 7 } } */