From c0386d4d54d2cc33d6efc0b998fe6396bf92be15 Mon Sep 17 00:00:00 2001 From: Jim Wilson Date: Wed, 4 Jan 2017 16:05:27 -0800 Subject: Five fixes, for fcsel, fcvtz, fminnm, mls, and non-widening mul. sim/aarch64/ * cpustate.c: Include math.h. (aarch64_set_FP_float): Use signbit to check for signed zero. (aarch64_set_FP_double): Likewise. * simulator.c (do_vec_MOV_immediate, case 0x8): Add missing break. (do_vec_mul): In all DO_VEC_WIDENING_MUL calls, make second and fourth args same size as third arg. (fmaxnm): Use isnan instead of fpclassify. (fminnm, dmaxnm, dminnm): Likewise. (do_vec_MLS): Reverse order of subtraction operands. (dexSimpleFPCondSelect): Call aarch64_get_FP_double or aarch64_get_FP_float to get source register contents. (UINT_MIN, ULONG_MIN, FLOAT_UINT_MAX, FLOAT_UINT_MIN, DOUBLE_UINT_MAX, DOUBLE_UINT_MIN, FLOAT_ULONG_MAX, FLOAT_ULONG_MIN, DOUBLE_ULONG_MAX, DOUBLE_ULONG_MIN): New. (do_fcvtzu): Use ULONG instead of LONG, and UINT instead of INT in raise_exception calls. sim/testsuite/sim/aarch64/ * fcsel.s: New. * fcvtz.s: New. * fminnm.s: New. * mls.s: New. * mul.s: New. --- sim/aarch64/ChangeLog | 19 ++++ sim/aarch64/cpustate.c | 9 +- sim/aarch64/simulator.c | 76 ++++++++------ sim/testsuite/sim/aarch64/ChangeLog | 8 ++ sim/testsuite/sim/aarch64/fcsel.s | 53 ++++++++++ sim/testsuite/sim/aarch64/fcvtz.s | 202 ++++++++++++++++++++++++++++++++++++ sim/testsuite/sim/aarch64/fminnm.s | 82 +++++++++++++++ sim/testsuite/sim/aarch64/mls.s | 103 ++++++++++++++++++ sim/testsuite/sim/aarch64/mul.s | 99 ++++++++++++++++++ 9 files changed, 618 insertions(+), 33 deletions(-) create mode 100644 sim/testsuite/sim/aarch64/fcsel.s create mode 100644 sim/testsuite/sim/aarch64/fcvtz.s create mode 100644 sim/testsuite/sim/aarch64/fminnm.s create mode 100644 sim/testsuite/sim/aarch64/mls.s create mode 100644 sim/testsuite/sim/aarch64/mul.s (limited to 'sim') diff --git a/sim/aarch64/ChangeLog b/sim/aarch64/ChangeLog index b1baf26..eff0a93 100644 --- a/sim/aarch64/ChangeLog +++ b/sim/aarch64/ChangeLog @@ -1,3 +1,22 @@ +2017-01-04 Jim Wilson + + * cpustate.c: Include math.h. + (aarch64_set_FP_float): Use signbit to check for signed zero. + (aarch64_set_FP_double): Likewise. + * simulator.c (do_vec_MOV_immediate, case 0x8): Add missing break. + (do_vec_mul): In all DO_VEC_WIDENING_MUL calls, make second and fourth + args same size as third arg. + (fmaxnm): Use isnan instead of fpclassify. + (fminnm, dmaxnm, dminnm): Likewise. + (do_vec_MLS): Reverse order of subtraction operands. + (dexSimpleFPCondSelect): Call aarch64_get_FP_double or + aarch64_get_FP_float to get source register contents. + (UINT_MIN, ULONG_MIN, FLOAT_UINT_MAX, FLOAT_UINT_MIN, + DOUBLE_UINT_MAX, DOUBLE_UINT_MIN, FLOAT_ULONG_MAX, FLOAT_ULONG_MIN, + DOUBLE_ULONG_MAX, DOUBLE_ULONG_MIN): New. + (do_fcvtzu): Use ULONG instead of LONG, and UINT instead of INT in + raise_exception calls. + 2016-12-21 Jim Wilson * simulator.c (set_flags_for_float_compare): Add code to handle Inf. diff --git a/sim/aarch64/cpustate.c b/sim/aarch64/cpustate.c index 7975b32..b7ea5d4 100644 --- a/sim/aarch64/cpustate.c +++ b/sim/aarch64/cpustate.c @@ -20,6 +20,7 @@ along with this program. If not, see . */ #include +#include #include "sim-main.h" #include "cpustate.h" @@ -369,7 +370,9 @@ aarch64_set_FP_half (sim_cpu *cpu, VReg reg, float val) void aarch64_set_FP_float (sim_cpu *cpu, VReg reg, float val) { - if (val != cpu->fr[reg].s) + if (val != cpu->fr[reg].s + /* Handle +/- zero. */ + || signbit (val) != signbit (cpu->fr[reg].s)) { FRegister v; @@ -385,7 +388,9 @@ aarch64_set_FP_float (sim_cpu *cpu, VReg reg, float val) void aarch64_set_FP_double (sim_cpu *cpu, VReg reg, double val) { - if (val != cpu->fr[reg].d) + if (val != cpu->fr[reg].d + /* Handle +/- zero. */ + || signbit (val) != signbit (cpu->fr[reg].d)) { FRegister v; diff --git a/sim/aarch64/simulator.c b/sim/aarch64/simulator.c index b40f692..7b75c6e 100644 --- a/sim/aarch64/simulator.c +++ b/sim/aarch64/simulator.c @@ -3221,7 +3221,8 @@ do_vec_MOV_immediate (sim_cpu *cpu) case 0x8: /* 16-bit, no shift. */ for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, val); - /* Fall through. */ + break; + case 0xd: /* 32-bit, mask shift by 16. */ val <<= 8; val |= 0xFF; @@ -3724,15 +3725,15 @@ do_vec_mul (sim_cpu *cpu) switch (INSTR (23, 22)) { case 0: - DO_VEC_WIDENING_MUL (full ? 16 : 8, uint16_t, u8, u16); + DO_VEC_WIDENING_MUL (full ? 16 : 8, uint8_t, u8, u8); return; case 1: - DO_VEC_WIDENING_MUL (full ? 8 : 4, uint32_t, u16, u32); + DO_VEC_WIDENING_MUL (full ? 8 : 4, uint16_t, u16, u16); return; case 2: - DO_VEC_WIDENING_MUL (full ? 4 : 2, uint64_t, u32, u64); + DO_VEC_WIDENING_MUL (full ? 4 : 2, uint32_t, u32, u32); return; case 3: @@ -3831,13 +3832,13 @@ do_vec_MLA (sim_cpu *cpu) static float fmaxnm (float a, float b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a > b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3845,13 +3846,13 @@ fmaxnm (float a, float b) static float fminnm (float a, float b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a < b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3859,13 +3860,13 @@ fminnm (float a, float b) static double dmaxnm (double a, double b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a > b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -3873,13 +3874,13 @@ dmaxnm (double a, double b) static double dminnm (double a, double b) { - if (fpclassify (a) == FP_NORMAL) + if (! isnan (a)) { - if (fpclassify (b) == FP_NORMAL) + if (! isnan (b)) return a < b ? a : b; return a; } - else if (fpclassify (b) == FP_NORMAL) + else if (! isnan (b)) return b; return a; } @@ -6346,25 +6347,25 @@ do_vec_MLS (sim_cpu *cpu) case 0: for (i = 0; i < (full ? 16 : 8); i++) aarch64_set_vec_u8 (cpu, vd, i, - (aarch64_get_vec_u8 (cpu, vn, i) - * aarch64_get_vec_u8 (cpu, vm, i)) - - aarch64_get_vec_u8 (cpu, vd, i)); + aarch64_get_vec_u8 (cpu, vd, i) + - (aarch64_get_vec_u8 (cpu, vn, i) + * aarch64_get_vec_u8 (cpu, vm, i))); return; case 1: for (i = 0; i < (full ? 8 : 4); i++) aarch64_set_vec_u16 (cpu, vd, i, - (aarch64_get_vec_u16 (cpu, vn, i) - * aarch64_get_vec_u16 (cpu, vm, i)) - - aarch64_get_vec_u16 (cpu, vd, i)); + aarch64_get_vec_u16 (cpu, vd, i) + - (aarch64_get_vec_u16 (cpu, vn, i) + * aarch64_get_vec_u16 (cpu, vm, i))); return; case 2: for (i = 0; i < (full ? 4 : 2); i++) aarch64_set_vec_u32 (cpu, vd, i, - (aarch64_get_vec_u32 (cpu, vn, i) - * aarch64_get_vec_u32 (cpu, vm, i)) - - aarch64_get_vec_u32 (cpu, vd, i)); + aarch64_get_vec_u32 (cpu, vd, i) + - (aarch64_get_vec_u32 (cpu, vn, i) + * aarch64_get_vec_u32 (cpu, vm, i))); return; default: @@ -7463,9 +7464,11 @@ dexSimpleFPCondSelect (sim_cpu *cpu) TRACE_DECODE (cpu, "emulated at line %d", __LINE__); if (INSTR (22, 22)) - aarch64_set_FP_double (cpu, sd, set ? sn : sm); + aarch64_set_FP_double (cpu, sd, (set ? aarch64_get_FP_double (cpu, sn) + : aarch64_get_FP_double (cpu, sm))); else - aarch64_set_FP_float (cpu, sd, set ? sn : sm); + aarch64_set_FP_float (cpu, sd, (set ? aarch64_get_FP_float (cpu, sn) + : aarch64_get_FP_float (cpu, sm))); } /* Store 32 bit unscaled signed 9 bit. */ @@ -8117,6 +8120,17 @@ static const float FLOAT_LONG_MIN = (float) LONG_MIN; static const double DOUBLE_LONG_MAX = (double) LONG_MAX; static const double DOUBLE_LONG_MIN = (double) LONG_MIN; +#define UINT_MIN 0 +#define ULONG_MIN 0 +static const float FLOAT_UINT_MAX = (float) UINT_MAX; +static const float FLOAT_UINT_MIN = (float) UINT_MIN; +static const double DOUBLE_UINT_MAX = (double) UINT_MAX; +static const double DOUBLE_UINT_MIN = (double) UINT_MIN; +static const float FLOAT_ULONG_MAX = (float) ULONG_MAX; +static const float FLOAT_ULONG_MIN = (float) ULONG_MIN; +static const double DOUBLE_ULONG_MAX = (double) ULONG_MAX; +static const double DOUBLE_ULONG_MIN = (double) ULONG_MIN; + /* Check for FP exception conditions: NaN raises IO Infinity raises IO @@ -8262,7 +8276,7 @@ do_fcvtzu (sim_cpu *cpu) /* Do not raise an exception if we have reached ULONG_MAX. */ if (value != (1UL << 63)) - RAISE_EXCEPTIONS (d, value, DOUBLE, LONG); + RAISE_EXCEPTIONS (d, value, DOUBLE, ULONG); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -8273,7 +8287,7 @@ do_fcvtzu (sim_cpu *cpu) /* Do not raise an exception if we have reached ULONG_MAX. */ if (value != (1UL << 63)) - RAISE_EXCEPTIONS (f, value, FLOAT, LONG); + RAISE_EXCEPTIONS (f, value, FLOAT, ULONG); aarch64_set_reg_u64 (cpu, rd, NO_SP, value); } @@ -8290,7 +8304,7 @@ do_fcvtzu (sim_cpu *cpu) value = (uint32_t) d; /* Do not raise an exception if we have reached UINT_MAX. */ if (value != (1UL << 31)) - RAISE_EXCEPTIONS (d, value, DOUBLE, INT); + RAISE_EXCEPTIONS (d, value, DOUBLE, UINT); } else { @@ -8299,7 +8313,7 @@ do_fcvtzu (sim_cpu *cpu) value = (uint32_t) f; /* Do not raise an exception if we have reached UINT_MAX. */ if (value != (1UL << 31)) - RAISE_EXCEPTIONS (f, value, FLOAT, INT); + RAISE_EXCEPTIONS (f, value, FLOAT, UINT); } aarch64_set_reg_u64 (cpu, rd, NO_SP, value); diff --git a/sim/testsuite/sim/aarch64/ChangeLog b/sim/testsuite/sim/aarch64/ChangeLog index a130a97..63f0d7d 100644 --- a/sim/testsuite/sim/aarch64/ChangeLog +++ b/sim/testsuite/sim/aarch64/ChangeLog @@ -1,3 +1,11 @@ +2017-01-04 Jim Wilson + + * fcsel.s: New. + * fcvtz.s: New. + * fminnm.s: New. + * mls.s: New. + * mul.s: New. + 2016-12-21 Jim Wilson * fcmp.s: New. diff --git a/sim/testsuite/sim/aarch64/fcsel.s b/sim/testsuite/sim/aarch64/fcsel.s new file mode 100644 index 0000000..5b8443c --- /dev/null +++ b/sim/testsuite/sim/aarch64/fcsel.s @@ -0,0 +1,53 @@ +# mach: aarch64 + +# Check the FP Conditional Select instruction: fcsel. +# Check 1/1 eq/neg, and 1/2 lt/gt. + +.include "testutils.inc" + + start + fmov s0, #1.0 + fmov s1, #1.0 + fmov s2, #-1.0 + fcmp s0, s1 + fcsel s3, s0, s2, eq + fcmp s3, s0 + bne .Lfailure + fcsel s3, s0, s2, ne + fcmp s3, s2 + bne .Lfailure + + fmov s0, #1.0 + fmov s1, #2.0 + fcmp s0, s1 + fcsel s3, s0, s2, lt + fcmp s3, s0 + bne .Lfailure + fcsel s3, s0, s2, gt + fcmp s3, s2 + bne .Lfailure + + fmov d0, #1.0 + fmov d1, #1.0 + fmov d2, #-1.0 + fcmp d0, d1 + fcsel d3, d0, d2, eq + fcmp d3, d0 + bne .Lfailure + fcsel d3, d0, d2, ne + fcmp d3, d2 + bne .Lfailure + + fmov d0, #1.0 + fmov d1, #2.0 + fcmp d0, d1 + fcsel d3, d0, d2, lt + fcmp d3, d0 + bne .Lfailure + fcsel d3, d0, d2, gt + fcmp d3, d2 + bne .Lfailure + + pass +.Lfailure: + fail diff --git a/sim/testsuite/sim/aarch64/fcvtz.s b/sim/testsuite/sim/aarch64/fcvtz.s new file mode 100644 index 0000000..9bb6f9b --- /dev/null +++ b/sim/testsuite/sim/aarch64/fcvtz.s @@ -0,0 +1,202 @@ +# mach: aarch64 + +# Check the FP convert to int round toward zero instructions: fcvtszs32, +# fcvtszs, fcvtszd32, fcvtszd, fcvtzu. +# For 32-bit signed convert, test values -1.5, INT_MAX, and INT_MIN. +# For 64-bit signed convert, test values -1.5, LONG_MAX, and LONG_MIN. +# For 32-bit unsigned convert, test values 1.5, INT_MAX, and UINT_MAX. +# For 64-bit unsigned convert, test values 1.5, LONG_MAX, and ULONG_MAX. + + .data +fm1p5: + .word 3217031168 +fimax: + .word 1325400064 +fimin: + .word 3472883712 +flmax: + .word 1593835520 +flmin: + .word 3741319168 +f1p5: + .word 1069547520 +fuimax: + .word 1333788672 +fulmax: + .word 1602224128 + +dm1p5: + .word 0 + .word -1074266112 +dimax: + .word 4290772992 + .word 1105199103 +dimin: + .word 0 + .word -1042284544 +dlmax: + .word 0 + .word 1138753536 +dlmin: + .word 0 + .word -1008730112 +d1p5: + .word 0 + .word 1073217536 +duimax: + .word 4292870144 + .word 1106247679 +dulmax: + .word 0 + .word 1139802112 + +.include "testutils.inc" + + start + adrp x0, fm1p5 + ldr s0, [x0, #:lo12:fm1p5] + fcvtzs w1, s0 + cmp w1, #-1 + bne .Lfailure + adrp x0, fimax + ldr s0, [x0, #:lo12:fimax] + fcvtzs w1, s0 + mov w2, #0x7fffffff + cmp w1, w2 + bne .Lfailure + adrp x0, fimin + ldr s0, [x0, #:lo12:fimin] + fcvtzs w1, s0 + mov w2, #0x80000000 + cmp w1, w2 + bne .Lfailure + + adrp x0, fm1p5 + ldr s0, [x0, #:lo12:fm1p5] + fcvtzs x1, s0 + cmp x1, #-1 + bne .Lfailure + adrp x0, flmax + ldr s0, [x0, #:lo12:flmax] + fcvtzs x1, s0 + mov x2, #0x7fffffffffffffff + cmp x1, x2 + bne .Lfailure + adrp x0, flmin + ldr s0, [x0, #:lo12:flmin] + fcvtzs x1, s0 + mov x2, #0x8000000000000000 + cmp x1, x2 + bne .Lfailure + + adrp x0, dm1p5 + ldr d0, [x0, #:lo12:dm1p5] + fcvtzs w1, d0 + cmp w1, #-1 + bne .Lfailure + adrp x0, dimax + ldr d0, [x0, #:lo12:dimax] + fcvtzs w1, d0 + mov w2, #0x7fffffff + cmp w1, w2 + bne .Lfailure + adrp x0, dimin + ldr d0, [x0, #:lo12:dimin] + fcvtzs w1, d0 + mov w2, #0x80000000 + cmp w1, w2 + bne .Lfailure + + adrp x0, dm1p5 + ldr d0, [x0, #:lo12:dm1p5] + fcvtzs x1, d0 + cmp x1, #-1 + bne .Lfailure + adrp x0, dlmax + ldr d0, [x0, #:lo12:dlmax] + fcvtzs x1, d0 + mov x2, #0x7fffffffffffffff + cmp x1, x2 + bne .Lfailure + adrp x0, dlmin + ldr d0, [x0, #:lo12:dlmin] + fcvtzs x1, d0 + mov x2, #0x8000000000000000 + cmp x1, x2 + bne .Lfailure + + adrp x0, f1p5 + ldr s0, [x0, #:lo12:f1p5] + fcvtzu w1, s0 + cmp w1, #1 + bne .Lfailure + adrp x0, fimax + ldr s0, [x0, #:lo12:fimax] + fcvtzu w1, s0 + mov w2, #0x80000000 + cmp w1, w2 + bne .Lfailure + adrp x0, fuimax + ldr s0, [x0, #:lo12:fuimax] + fcvtzu w1, s0 + mov w2, #0xffffffff + cmp w1, w2 + bne .Lfailure + + adrp x0, f1p5 + ldr s0, [x0, #:lo12:f1p5] + fcvtzu x1, s0 + cmp x1, #1 + bne .Lfailure + adrp x0, flmax + ldr s0, [x0, #:lo12:flmax] + fcvtzu x1, s0 + mov x2, #0x8000000000000000 + cmp x1, x2 + bne .Lfailure + adrp x0, fulmax + ldr s0, [x0, #:lo12:fulmax] + fcvtzu x1, s0 + mov x2, #0xffffffffffffffff + cmp x1, x2 + bne .Lfailure + + adrp x0, d1p5 + ldr d0, [x0, #:lo12:d1p5] + fcvtzu w1, d0 + cmp w1, #1 + bne .Lfailure + adrp x0, dimax + ldr d0, [x0, #:lo12:dimax] + fcvtzu w1, d0 + mov w2, #0x7fffffff + cmp w1, w2 + bne .Lfailure + adrp x0, duimax + ldr d0, [x0, #:lo12:duimax] + fcvtzu w1, d0 + mov w2, #0xffffffff + cmp w1, w2 + bne .Lfailure + + adrp x0, d1p5 + ldr d0, [x0, #:lo12:d1p5] + fcvtzu x1, d0 + cmp x1, #1 + bne .Lfailure + adrp x0, dlmax + ldr d0, [x0, #:lo12:dlmax] + fcvtzu x1, d0 + mov x2, #0x8000000000000000 + cmp x1, x2 + bne .Lfailure + adrp x0, dulmax + ldr d0, [x0, #:lo12:dulmax] + fcvtzu x1, d0 + mov x2, #0xffffffffffffffff + cmp x1, x2 + bne .Lfailure + + pass +.Lfailure: + fail diff --git a/sim/testsuite/sim/aarch64/fminnm.s b/sim/testsuite/sim/aarch64/fminnm.s new file mode 100644 index 0000000..43ccd7c --- /dev/null +++ b/sim/testsuite/sim/aarch64/fminnm.s @@ -0,0 +1,82 @@ +# mach: aarch64 + +# Check the FP min/max number instructions: fminnm, fmaxnm, dminnm, dmaxnm. +# For min, check 2/1, 1/0, -1/-Inf. +# For max, check 1/2, -1/0, 1/+inf. + +.include "testutils.inc" + + start + fmov s0, #2.0 + fmov s1, #1.0 + fminnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #2.0 + fmov d1, #1.0 + fminnm d2, d0, d1 + fcmp d2, d1 + bne .Lfailure + + fmov s0, #1.0 + fmov s1, wzr + fminnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #1.0 + fmov d1, xzr + fminnm d2, d0, d1 + fcmp d2, d1 + bne .Lfailure + + fmov s0, #-1.0 + fmov s1, wzr + fdiv s1, s0, s1 + fminnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #-1.0 + fmov d1, xzr + fdiv d1, d0, d1 + fminnm d1, d0, d1 + fcmp d0, d0 + bne .Lfailure + + fmov s0, #1.0 + fmov s1, #2.0 + fmaxnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #1.0 + fmov d1, #2.0 + fmaxnm d2, d0, d1 + fcmp d2, d1 + bne .Lfailure + + fmov s0, #-1.0 + fmov s1, wzr + fmaxnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #-1.0 + fmov d1, xzr + fmaxnm d2, d0, d1 + fcmp d2, d1 + bne .Lfailure + + fmov s0, #1.0 + fmov s1, wzr + fdiv s1, s0, s1 + fmaxnm s2, s0, s1 + fcmp s2, s1 + bne .Lfailure + fmov d0, #1.0 + fmov d1, xzr + fdiv d1, d0, d1 + fmaxnm d1, d0, d1 + fcmp d0, d0 + bne .Lfailure + + pass +.Lfailure: + fail diff --git a/sim/testsuite/sim/aarch64/mls.s b/sim/testsuite/sim/aarch64/mls.s new file mode 100644 index 0000000..a34a1aa --- /dev/null +++ b/sim/testsuite/sim/aarch64/mls.s @@ -0,0 +1,103 @@ +# mach: aarch64 + +# Check the vector multiply subtract instruction: mls. + +.include "testutils.inc" + +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d +m8b: + .word 0xf1f8fd00 + .word 0xc1d0dde8 +m16b: + .word 0xf1f8fd00 + .word 0xc1d0dde8 + .word 0x71889db0 + .word 0x01203d58 +m4h: + .word 0xe7f8fc00 + .word 0x8fd0c3e8 +m8h: + .word 0xe7f8fc00 + .word 0x8fd0c3e8 + .word 0xf7884bb0 + .word 0x1f209358 +m2s: + .word 0xebf5fc00 + .word 0x5b95c3e8 +m4s: + .word 0xebf5fc00 + .word 0x5b95c3e8 + .word 0x4ad54bb0 + .word 0xb9b49358 + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + + movi v1.8b, #1 + mls v1.8b, v0.8b, v0.8b + mov x1, v1.d[0] + adrp x3, m8b + ldr x4, [x3, #:lo12:m8b] + cmp x1, x4 + bne .Lfailure + + movi v1.16b, #1 + mls v1.16b, v0.16b, v0.16b + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m16b + ldr x4, [x3, #:lo12:m16b] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m16b+8] + cmp x2, x5 + bne .Lfailure + + movi v1.4h, #1 + mls v1.4h, v0.4h, v0.4h + mov x1, v1.d[0] + adrp x3, m4h + ldr x4, [x3, #:lo12:m4h] + cmp x1, x4 + bne .Lfailure + + movi v1.8h, #1 + mls v1.8h, v0.8h, v0.8h + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m8h + ldr x4, [x3, #:lo12:m8h] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m8h+8] + cmp x2, x5 + bne .Lfailure + + movi v1.2s, #1 + mls v1.2s, v0.2s, v0.2s + mov x1, v1.d[0] + adrp x3, m2s + ldr x4, [x3, #:lo12:m2s] + cmp x1, x4 + bne .Lfailure + + movi v1.4s, #1 + mls v1.4s, v0.4s, v0.4s + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m4s + ldr x4, [x3, #:lo12:m4s] + cmp x1, x4 + bne .Lfailure + ldr x5, [x3, #:lo12:m4s+8] + cmp x2, x5 + bne .Lfailure + + pass +.Lfailure: + fail diff --git a/sim/testsuite/sim/aarch64/mul.s b/sim/testsuite/sim/aarch64/mul.s new file mode 100644 index 0000000..783dba7 --- /dev/null +++ b/sim/testsuite/sim/aarch64/mul.s @@ -0,0 +1,99 @@ +# mach: aarch64 + +# Check the non-widening multiply vector instruction: mul. + +.include "testutils.inc" + + .data + .align 4 +input: + .word 0x04030201 + .word 0x08070605 + .word 0x0c0b0a09 + .word 0x100f0e0d +m8b: + .word 0x10090401 + .word 0x40312419 +m16b: + .word 0x10090401 + .word 0x40312419 + .word 0x90796451 + .word 0x00e1c4a9 +m4h: + .word 0x18090401 + .word 0x70313c19 +m8h: + .word 0x18090401 + .word 0x70313c19 + .word 0x0879b451 + .word 0xe0e16ca9 +m2s: + .word 0x140a0401 + .word 0xa46a3c19 +m4s: + .word 0x140a0401 + .word 0xa46a3c19 + .word 0xb52ab451 + .word 0x464b6ca9 + + start + adrp x0, input + ldr q0, [x0, #:lo12:input] + + mul v1.8b, v0.8b, v0.8b + mov x1, v1.d[0] + adrp x3, m8b + ldr x4, [x0, #:lo12:m8b] + cmp x1, x4 + bne .Lfailure + + mul v1.16b, v0.16b, v0.16b + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m16b + ldr x4, [x0, #:lo12:m16b] + cmp x1, x4 + bne .Lfailure + ldr x5, [x0, #:lo12:m16b+8] + cmp x2, x5 + bne .Lfailure + + mul v1.4h, v0.4h, v0.4h + mov x1, v1.d[0] + adrp x3, m4h + ldr x4, [x0, #:lo12:m4h] + cmp x1, x4 + bne .Lfailure + + mul v1.8h, v0.8h, v0.8h + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m8h + ldr x4, [x0, #:lo12:m8h] + cmp x1, x4 + bne .Lfailure + ldr x5, [x0, #:lo12:m8h+8] + cmp x2, x5 + bne .Lfailure + + mul v1.2s, v0.2s, v0.2s + mov x1, v1.d[0] + adrp x3, m2s + ldr x4, [x0, #:lo12:m2s] + cmp x1, x4 + bne .Lfailure + + mul v1.4s, v0.4s, v0.4s + mov x1, v1.d[0] + mov x2, v1.d[1] + adrp x3, m4s + ldr x4, [x0, #:lo12:m4s] + cmp x1, x4 + bne .Lfailure + ldr x5, [x0, #:lo12:m4s+8] + cmp x2, x5 + bne .Lfailure + + pass +.Lfailure: + fail -- cgit v1.1