diff options
author | Richard Henderson <rth@twiddle.net> | 2013-03-04 23:16:24 -0800 |
---|---|---|
committer | Aurelien Jarno <aurelien@aurel32.net> | 2013-04-27 02:16:43 +0200 |
commit | e86e0f2807fbadee09d9e06f11df69339cbbd94c (patch) | |
tree | e50dcc322152cd72be193240f118ea8b5506f155 /tcg/arm/tcg-target.c | |
parent | 2df3f1ee68269d40f5dcb5e8d9aba0869e150896 (diff) | |
download | qemu-e86e0f2807fbadee09d9e06f11df69339cbbd94c.zip qemu-e86e0f2807fbadee09d9e06f11df69339cbbd94c.tar.gz qemu-e86e0f2807fbadee09d9e06f11df69339cbbd94c.tar.bz2 |
tcg-arm: Improve constant generation
Try fully rotated arguments to mov and mvn before trying movt
or full decomposition. Begin decomposition with mvn when it
looks like it'll help. Examples include
-: mov r9, #0x00000fa0
-: orr r9, r9, #0x000ee000
-: orr r9, r9, #0x0ff00000
-: orr r9, r9, #0xf0000000
+: mvn r9, #0x0000005f
+: eor r9, r9, #0x00011000
Reviewed-by: Aurelien Jarno <aurelien@aurel32.net>
Signed-off-by: Richard Henderson <rth@twiddle.net>
Diffstat (limited to 'tcg/arm/tcg-target.c')
-rw-r--r-- | tcg/arm/tcg-target.c | 67 |
1 files changed, 44 insertions, 23 deletions
diff --git a/tcg/arm/tcg-target.c b/tcg/arm/tcg-target.c index 1a236c2..dfcc8e6 100644 --- a/tcg/arm/tcg-target.c +++ b/tcg/arm/tcg-target.c @@ -427,15 +427,31 @@ static inline void tcg_out_dat_imm(TCGContext *s, (rn << 16) | (rd << 12) | im); } -static inline void tcg_out_movi32(TCGContext *s, - int cond, int rd, uint32_t arg) -{ - /* TODO: This is very suboptimal, we can easily have a constant - * pool somewhere after all the instructions. */ - if ((int)arg < 0 && (int)arg >= -0x100) { - tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, (~arg) & 0xff); - } else if (use_armv7_instructions) { - /* use movw/movt */ +static void tcg_out_movi32(TCGContext *s, int cond, int rd, uint32_t arg) +{ + int rot, opc, rn; + + /* For armv7, make sure not to use movw+movt when mov/mvn would do. + Speed things up by only checking when movt would be required. + Prior to armv7, have one go at fully rotated immediates before + doing the decomposition thing below. */ + if (!use_armv7_instructions || (arg & 0xffff0000)) { + rot = encode_imm(arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MOV, rd, 0, + rotl(arg, rot) | (rot << 7)); + return; + } + rot = encode_imm(~arg); + if (rot >= 0) { + tcg_out_dat_imm(s, cond, ARITH_MVN, rd, 0, + rotl(~arg, rot) | (rot << 7)); + return; + } + } + + /* Use movw + movt. */ + if (use_armv7_instructions) { /* movw */ tcg_out32(s, (cond << 28) | 0x03000000 | (rd << 12) | ((arg << 4) & 0x000f0000) | (arg & 0xfff)); @@ -444,22 +460,27 @@ static inline void tcg_out_movi32(TCGContext *s, tcg_out32(s, (cond << 28) | 0x03400000 | (rd << 12) | ((arg >> 12) & 0x000f0000) | ((arg >> 16) & 0xfff)); } - } else { - int opc = ARITH_MOV; - int rn = 0; - - do { - int i, rot; - - i = ctz32(arg) & ~1; - rot = ((32 - i) << 7) & 0xf00; - tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); - arg &= ~(0xff << i); + return; + } - opc = ARITH_ORR; - rn = rd; - } while (arg); + /* TODO: This is very suboptimal, we can easily have a constant + pool somewhere after all the instructions. */ + opc = ARITH_MOV; + rn = 0; + /* If we have lots of leading 1's, we can shorten the sequence by + beginning with mvn and then clearing higher bits with eor. */ + if (clz32(~arg) > clz32(arg)) { + opc = ARITH_MVN, arg = ~arg; } + do { + int i = ctz32(arg) & ~1; + rot = ((32 - i) << 7) & 0xf00; + tcg_out_dat_imm(s, cond, opc, rd, rn, ((arg >> i) & 0xff) | rot); + arg &= ~(0xff << i); + + opc = ARITH_EOR; + rn = rd; + } while (arg); } static inline void tcg_out_dat_rI(TCGContext *s, int cond, int opc, TCGArg dst, |