aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRichard Henderson <richard.henderson@linaro.org>2024-12-25 20:22:45 -0800
committerRichard Henderson <richard.henderson@linaro.org>2025-01-16 20:57:17 -0800
commit802ef65b5f8dccbcabb7960bee9993ec65f95ab6 (patch)
tree29c9b3f27687acb9e04f8fdab8260a6f61b2f133
parent936fc0a96ed02e0d996ea58b05daf983b1cb3041 (diff)
downloadqemu-802ef65b5f8dccbcabb7960bee9993ec65f95ab6.zip
qemu-802ef65b5f8dccbcabb7960bee9993ec65f95ab6.tar.gz
qemu-802ef65b5f8dccbcabb7960bee9993ec65f95ab6.tar.bz2
tcg/arm: Add full [US]XT[BH] into {s}extract
The armv6 uxt and sxt opcodes have a 2-bit rotate field which supports extractions from ofs = {0,8,16,24}. Special case ofs = 0, len <= 8 as AND. Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
-rw-r--r--tcg/arm/tcg-target-has.h21
-rw-r--r--tcg/arm/tcg-target.c.inc54
2 files changed, 67 insertions, 8 deletions
diff --git a/tcg/arm/tcg-target-has.h b/tcg/arm/tcg-target-has.h
index 3161855..d9f3311 100644
--- a/tcg/arm/tcg-target-has.h
+++ b/tcg/arm/tcg-target-has.h
@@ -41,8 +41,8 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_ctz_i32 use_armv7_instructions
#define TCG_TARGET_HAS_ctpop_i32 0
#define TCG_TARGET_HAS_deposit_i32 use_armv7_instructions
-#define TCG_TARGET_HAS_extract_i32 use_armv7_instructions
-#define TCG_TARGET_HAS_sextract_i32 use_armv7_instructions
+#define TCG_TARGET_HAS_extract_i32 1
+#define TCG_TARGET_HAS_sextract_i32 1
#define TCG_TARGET_HAS_extract2_i32 1
#define TCG_TARGET_HAS_negsetcond_i32 1
#define TCG_TARGET_HAS_mulu2_i32 1
@@ -82,4 +82,21 @@ extern bool use_neon_instructions;
#define TCG_TARGET_HAS_cmpsel_vec 0
#define TCG_TARGET_HAS_tst_vec 1
+static inline bool
+tcg_target_extract_valid(TCGType type, unsigned ofs, unsigned len)
+{
+ if (use_armv7_instructions) {
+ return true; /* SBFX or UBFX */
+ }
+ switch (len) {
+ case 8: /* SXTB or UXTB */
+ case 16: /* SXTH or UXTH */
+ return (ofs % 8) == 0;
+ }
+ return false;
+}
+
+#define TCG_TARGET_extract_valid tcg_target_extract_valid
+#define TCG_TARGET_sextract_valid tcg_target_extract_valid
+
#endif
diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc
index 9cfb733..12dad73 100644
--- a/tcg/arm/tcg-target.c.inc
+++ b/tcg/arm/tcg-target.c.inc
@@ -1036,19 +1036,61 @@ static void tcg_out_deposit(TCGContext *s, ARMCond cond, TCGReg rd,
static void tcg_out_extract(TCGContext *s, ARMCond cond, TCGReg rd,
TCGReg rn, int ofs, int len)
{
- /* ubfx */
- tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
- | (ofs << 7) | ((len - 1) << 16));
+ /* According to gcc, AND can be faster. */
+ if (ofs == 0 && len <= 8) {
+ tcg_out_dat_imm(s, cond, ARITH_AND, rd, rn,
+ encode_imm_nofail((1 << len) - 1));
+ return;
+ }
+
+ if (use_armv7_instructions) {
+ /* ubfx */
+ tcg_out32(s, 0x07e00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+ return;
+ }
+
+ assert(ofs % 8 == 0);
+ switch (len) {
+ case 8:
+ /* uxtb */
+ tcg_out32(s, 0x06ef0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ case 16:
+ /* uxth */
+ tcg_out32(s, 0x06ff0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
static void tcg_out_sextract(TCGContext *s, ARMCond cond, TCGReg rd,
TCGReg rn, int ofs, int len)
{
- /* sbfx */
- tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
- | (ofs << 7) | ((len - 1) << 16));
+ if (use_armv7_instructions) {
+ /* sbfx */
+ tcg_out32(s, 0x07a00050 | (cond << 28) | (rd << 12) | rn
+ | (ofs << 7) | ((len - 1) << 16));
+ return;
+ }
+
+ assert(ofs % 8 == 0);
+ switch (len) {
+ case 8:
+ /* sxtb */
+ tcg_out32(s, 0x06af0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ case 16:
+ /* sxth */
+ tcg_out32(s, 0x06bf0070 | (cond << 28) | (rd << 12) | (ofs << 7) | rn);
+ break;
+ default:
+ g_assert_not_reached();
+ }
}
+
static void tcg_out_ld32u(TCGContext *s, ARMCond cond,
TCGReg rd, TCGReg rn, int32_t offset)
{