From 4cdecf219b3f8165a24ca83bc92f0241e0832513 Mon Sep 17 00:00:00 2001 From: Albert Ou Date: Sat, 28 Sep 2019 19:22:11 +0800 Subject: rvv: fix the rounding bit position for vnclip instructions. 1. The rounding increment should be derived from the shift amount, not SEW. 2. Use 128bit to store temporary result to handle shift = 63 case in rv64 Signed-off-by: Albert Ou Signed-off-by: Chih-Min Chao --- riscv/insns/vnclip_vi.h | 11 ++++++----- riscv/insns/vnclip_vv.h | 17 ++++++----------- riscv/insns/vnclip_vx.h | 16 ++++++---------- riscv/insns/vnclipu_vi.h | 6 ++++-- riscv/insns/vnclipu_vv.h | 17 ++++++----------- riscv/insns/vnclipu_vx.h | 17 ++++++----------- 6 files changed, 34 insertions(+), 50 deletions(-) diff --git a/riscv/insns/vnclip_vi.h b/riscv/insns/vnclip_vi.h index ca27593..287035e 100644 --- a/riscv/insns/vnclip_vi.h +++ b/riscv/insns/vnclip_vi.h @@ -4,14 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + unsigned shift = zimm5 & ((sew * 2) - 1); + + // rounding + INT_ROUNDING(result, xrm, shift); - result = vsext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; diff --git a/riscv/insns/vnclip_vv.h b/riscv/insns/vnclip_vv.h index 7bcb4cb..4ec0f40 100644 --- a/riscv/insns/vnclip_vv.h +++ b/riscv/insns/vnclip_vv.h @@ -4,20 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = vs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - - result = (vsext(result, sew * 2)) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; diff --git a/riscv/insns/vnclip_vx.h b/riscv/insns/vnclip_vx.h index b66e830..068d866 100644 --- a/riscv/insns/vnclip_vx.h +++ b/riscv/insns/vnclip_vx.h @@ -4,19 +4,15 @@ int64_t int_max = (1 << (P.VU.vsew - 1)) - 1; int64_t int_min = -(1 << (P.VU.vsew - 1)); VI_VVXI_LOOP_NARROW ({ + int128_t result = vs2; + unsigned shift = rs1 & ((sew * 2) - 1); - int64_t result = vs2; -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - unsigned_shift_amount = 2 * sew - 1; - } - result = vsext(result, sew * 2) >> unsigned_shift_amount; + result = result >> shift; -// saturation + // saturation if (result < int_min) { result = int_min; P.VU.vxsat = 1; diff --git a/riscv/insns/vnclipu_vi.h b/riscv/insns/vnclipu_vi.h index 61cb015..73ad777 100644 --- a/riscv/insns/vnclipu_vi.h +++ b/riscv/insns/vnclipu_vi.h @@ -4,11 +4,13 @@ uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ uint64_t result = vs2_u; + unsigned shift = zimm5 & ((sew * 2) - 1); + // rounding - INT_ROUNDING(result, xrm, sew); + INT_ROUNDING(result, xrm, shift); // unsigned shifting to rs1 - result = vzext(result, sew * 2) >> (zimm5 & ((sew * 2) < 32? (sew * 2) - 1: 31)); + result = result >> shift; // saturation if (result & (uint64_t)(-1ll << sew)) { diff --git a/riscv/insns/vnclipu_vv.h b/riscv/insns/vnclipu_vv.h index 004f24f..3be3446 100644 --- a/riscv/insns/vnclipu_vv.h +++ b/riscv/insns/vnclipu_vv.h @@ -3,20 +3,15 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ + uint128_t result = vs2_u; + unsigned shift = vs1 & ((sew * 2) - 1); - uint64_t result = vs2_u; + // rounding + INT_ROUNDING(result, xrm, shift); -// rounding - INT_ROUNDING(result, xrm, sew); + result = result >> shift; -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(vs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; diff --git a/riscv/insns/vnclipu_vx.h b/riscv/insns/vnclipu_vx.h index 0507a2b..5b064ce 100644 --- a/riscv/insns/vnclipu_vx.h +++ b/riscv/insns/vnclipu_vx.h @@ -3,20 +3,15 @@ VRM xrm = P.VU.get_vround_mode(); uint64_t int_max = ~(-1ll << P.VU.vsew); VI_VVXI_LOOP_NARROW ({ - uint64_t result = vs2; + uint128_t result = vs2_u; + unsigned shift = rs1 & ((sew * 2) - 1); -// rounding - INT_ROUNDING(result, xrm, sew); + // rounding + INT_ROUNDING(result, xrm, shift); -// unsigned shifting to rs1 - uint64_t unsigned_shift_amount = (uint64_t)(rs1 & ((sew * 2) - 1)); - if (unsigned_shift_amount >= (2 * sew)) { - result = 0; - } else { - result = vzext(result, sew * 2) >> unsigned_shift_amount; - } + result = result >> shift; -// saturation + // saturation if (result & (uint64_t)(-1ll << sew)) { result = int_max; P.VU.vxsat = 1; -- cgit v1.1