From 071d49ac7714a26ef92886e9504c35d0edfa15d4 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 28 Nov 2019 19:42:11 -0800 Subject: rvv: add vfxxx.vv float64 suuport Signed-off-by: Chih-Min Chao --- riscv/decode.h | 17 ++++++++++++----- riscv/insns/vfadd_vv.h | 3 +++ riscv/insns/vfclass_v.h | 3 +++ riscv/insns/vfcvt_xu_f_v.h | 3 +++ riscv/insns/vfdiv_vv.h | 3 +++ riscv/insns/vfdot_vv.h | 3 +++ riscv/insns/vfmacc_vv.h | 3 +++ riscv/insns/vfmadd_vv.h | 3 +++ riscv/insns/vfmax_vv.h | 3 +++ riscv/insns/vfmin_vv.h | 3 +++ riscv/insns/vfmsac_vv.h | 3 +++ riscv/insns/vfmsub_vv.h | 3 +++ riscv/insns/vfmul_vv.h | 3 +++ riscv/insns/vfnmacc_vv.h | 3 +++ riscv/insns/vfnmadd_vv.h | 3 +++ riscv/insns/vfnmsac_vv.h | 3 +++ riscv/insns/vfnmsub_vv.h | 3 +++ riscv/insns/vfsgnj_vv.h | 3 +++ riscv/insns/vfsgnjn_vv.h | 3 +++ riscv/insns/vfsgnjx_vv.h | 3 +++ riscv/insns/vfsqrt_v.h | 3 +++ riscv/insns/vfsub_vv.h | 3 +++ 22 files changed, 75 insertions(+), 5 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 495ffc4..bdee837 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1595,9 +1595,9 @@ for (reg_t i = 0; i < vlmax; ++i) { \ // vector: vfp helper // #define VI_VFP_COMMON \ - require_extension('F'); \ require_fp; \ - require(P.VU.vsew == 32); \ + require((P.VU.vsew == e32 && p->supports_extension('F')) || \ + (P.VU.vsew == e64 && p->supports_extension('D'))); \ require_vector;\ reg_t vl = P.VU.vl; \ reg_t rd_num = insn.rd(); \ @@ -1669,7 +1669,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ P.VU.vstart = 0; \ set_fp_exceptions; -#define VI_VFP_VV_LOOP(BODY) \ +#define VI_VFP_VV_LOOP(BODY32, BODY64) \ VI_CHECK_SSS(true); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ @@ -1677,12 +1677,19 @@ for (reg_t i = 0; i < vlmax; ++i) { \ float32_t &vd = P.VU.elt(rd_num, i); \ float32_t vs1 = P.VU.elt(rs1_num, i); \ float32_t vs2 = P.VU.elt(rs2_num, i); \ - BODY; \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ set_fp_exceptions; \ break; \ }\ case e16: \ - case e8: \ default: \ require(0); \ break; \ diff --git a/riscv/insns/vfadd_vv.h b/riscv/insns/vfadd_vv.h index de0ae53..b333a8a 100644 --- a/riscv/insns/vfadd_vv.h +++ b/riscv/insns/vfadd_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_add(vs1, vs2); +}, +{ + vd = f64_add(vs1, vs2); }) diff --git a/riscv/insns/vfclass_v.h b/riscv/insns/vfclass_v.h index 75f29a2..8ee092f 100644 --- a/riscv/insns/vfclass_v.h +++ b/riscv/insns/vfclass_v.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd.v = f32_classify(vs2); +}, +{ + vd.v = f64_classify(vs2); }) diff --git a/riscv/insns/vfcvt_xu_f_v.h b/riscv/insns/vfcvt_xu_f_v.h index 76c7735..5f19f90 100644 --- a/riscv/insns/vfcvt_xu_f_v.h +++ b/riscv/insns/vfcvt_xu_f_v.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ P.VU.elt(rd_num, i) = f32_to_ui32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_ui64(vs2, STATE.frm, true); }) diff --git a/riscv/insns/vfdiv_vv.h b/riscv/insns/vfdiv_vv.h index c20ff1d..8a49a91 100644 --- a/riscv/insns/vfdiv_vv.h +++ b/riscv/insns/vfdiv_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_div(vs2, vs1); +}, +{ + vd = f64_div(vs2, vs1); }) diff --git a/riscv/insns/vfdot_vv.h b/riscv/insns/vfdot_vv.h index 11c8bce..85d0b8a 100644 --- a/riscv/insns/vfdot_vv.h +++ b/riscv/insns/vfdot_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_add(vd, f32_mul(vs2, vs1)); +}, +{ + vd = f64_add(vd, f64_mul(vs2, vs1)); }) diff --git a/riscv/insns/vfmacc_vv.h b/riscv/insns/vfmacc_vv.h index 663a648..f1caf33 100644 --- a/riscv/insns/vfmacc_vv.h +++ b/riscv/insns/vfmacc_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(vs1, vs2, vd); +}, +{ + vd = f64_mulAdd(vs1, vs2, vd); }) diff --git a/riscv/insns/vfmadd_vv.h b/riscv/insns/vfmadd_vv.h index c967ec3..a095c38 100644 --- a/riscv/insns/vfmadd_vv.h +++ b/riscv/insns/vfmadd_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(vd, vs1, vs2); +}, +{ + vd = f64_mulAdd(vd, vs1, vs2); }) diff --git a/riscv/insns/vfmax_vv.h b/riscv/insns/vfmax_vv.h index 6d12f08..2329e74 100644 --- a/riscv/insns/vfmax_vv.h +++ b/riscv/insns/vfmax_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_max(vs2, vs1); +}, +{ + vd = f64_max(vs2, vs1); }) diff --git a/riscv/insns/vfmin_vv.h b/riscv/insns/vfmin_vv.h index 65d20ff..399b563 100644 --- a/riscv/insns/vfmin_vv.h +++ b/riscv/insns/vfmin_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_min(vs2, vs1); +}, +{ + vd = f64_min(vs2, vs1); }) diff --git a/riscv/insns/vfmsac_vv.h b/riscv/insns/vfmsac_vv.h index 952c12e..9b4ed9f 100644 --- a/riscv/insns/vfmsac_vv.h +++ b/riscv/insns/vfmsac_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(vs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vs1, vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfmsub_vv.h b/riscv/insns/vfmsub_vv.h index a58f1e3..f8e0b3d 100644 --- a/riscv/insns/vfmsub_vv.h +++ b/riscv/insns/vfmsub_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(vd, vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, vs1, f64(vs2.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfmul_vv.h b/riscv/insns/vfmul_vv.h index 259dc01..0e4d499 100644 --- a/riscv/insns/vfmul_vv.h +++ b/riscv/insns/vfmul_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mul(vs1, vs2); +}, +{ + vd = f64_mul(vs1, vs2); }) diff --git a/riscv/insns/vfnmacc_vv.h b/riscv/insns/vfnmacc_vv.h index b950df9..62a1486 100644 --- a/riscv/insns/vfnmacc_vv.h +++ b/riscv/insns/vfnmacc_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(f32(vs2.v ^ F32_SIGN), vs1, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vs2.v ^ F64_SIGN), vs1, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfnmadd_vv.h b/riscv/insns/vfnmadd_vv.h index f96d102..fc70574 100644 --- a/riscv/insns/vfnmadd_vv.h +++ b/riscv/insns/vfnmadd_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, f64(vs2.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfnmsac_vv.h b/riscv/insns/vfnmsac_vv.h index 0ecd648..795dc38 100644 --- a/riscv/insns/vfnmsac_vv.h +++ b/riscv/insns/vfnmsac_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(f32(vs1.v ^ F32_SIGN), vs2, vd); +}, +{ + vd = f64_mulAdd(f64(vs1.v ^ F64_SIGN), vs2, vd); }) diff --git a/riscv/insns/vfnmsub_vv.h b/riscv/insns/vfnmsub_vv.h index da9f59c..ff4a9b5 100644 --- a/riscv/insns/vfnmsub_vv.h +++ b/riscv/insns/vfnmsub_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), vs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), vs1, vs2); }) diff --git a/riscv/insns/vfsgnj_vv.h b/riscv/insns/vfsgnj_vv.h index 050dd9c..12d3d43 100644 --- a/riscv/insns/vfsgnj_vv.h +++ b/riscv/insns/vfsgnj_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = fsgnj32(vs2.v, vs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, false); }) diff --git a/riscv/insns/vfsgnjn_vv.h b/riscv/insns/vfsgnjn_vv.h index 6603352..a16acf7 100644 --- a/riscv/insns/vfsgnjn_vv.h +++ b/riscv/insns/vfsgnjn_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = fsgnj32(vs2.v, vs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, true, false); }) diff --git a/riscv/insns/vfsgnjx_vv.h b/riscv/insns/vfsgnjx_vv.h index 9cc12dc..9dbe078 100644 --- a/riscv/insns/vfsgnjx_vv.h +++ b/riscv/insns/vfsgnjx_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = fsgnj32(vs2.v, vs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, vs1.v, false, true); }) diff --git a/riscv/insns/vfsqrt_v.h b/riscv/insns/vfsqrt_v.h index 4931037..4a36932 100644 --- a/riscv/insns/vfsqrt_v.h +++ b/riscv/insns/vfsqrt_v.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_sqrt(vs2); +}, +{ + vd = f64_sqrt(vs2); }) diff --git a/riscv/insns/vfsub_vv.h b/riscv/insns/vfsub_vv.h index 71e7a43..40545fb 100644 --- a/riscv/insns/vfsub_vv.h +++ b/riscv/insns/vfsub_vv.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP ({ vd = f32_sub(vs2, vs1); +}, +{ + vd = f64_sub(vs2, vs1); }) -- cgit v1.1 From a9dce622c3d6fb2b72a4ca6daebdacebfd55b274 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 28 Nov 2019 19:55:33 -0800 Subject: rvv: add vfxxx.vf float64 support Signed-off-by: Chih-Min Chao --- riscv/decode.h | 12 ++++++++++-- riscv/insns/vfadd_vf.h | 5 ++++- riscv/insns/vfcvt_f_x_v.h | 4 ++++ riscv/insns/vfcvt_f_xu_v.h | 4 ++++ riscv/insns/vfcvt_x_f_v.h | 3 +++ riscv/insns/vfdiv_vf.h | 3 +++ riscv/insns/vfmacc_vf.h | 3 +++ riscv/insns/vfmadd_vf.h | 3 +++ riscv/insns/vfmax_vf.h | 3 +++ riscv/insns/vfmin_vf.h | 3 +++ riscv/insns/vfmsac_vf.h | 3 +++ riscv/insns/vfmsub_vf.h | 3 +++ riscv/insns/vfmul_vf.h | 3 +++ riscv/insns/vfnmacc_vf.h | 3 +++ riscv/insns/vfnmadd_vf.h | 3 +++ riscv/insns/vfnmsac_vf.h | 3 +++ riscv/insns/vfnmsub_vf.h | 3 +++ riscv/insns/vfrdiv_vf.h | 3 +++ riscv/insns/vfrsub_vf.h | 3 +++ riscv/insns/vfsgnj_vf.h | 3 +++ riscv/insns/vfsgnjn_vf.h | 3 +++ riscv/insns/vfsgnjx_vf.h | 3 +++ riscv/insns/vfsub_vf.h | 3 +++ 23 files changed, 79 insertions(+), 3 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index bdee837..62e798e 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1712,7 +1712,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_REDUCTION_END(e64) -#define VI_VFP_VF_LOOP(BODY) \ +#define VI_VFP_VF_LOOP(BODY32, BODY64) \ VI_CHECK_SSS(false); \ VI_VFP_LOOP_BASE \ switch(P.VU.vsew) { \ @@ -1720,7 +1720,15 @@ for (reg_t i = 0; i < vlmax; ++i) { \ float32_t &vd = P.VU.elt(rd_num, i); \ float32_t rs1 = f32(READ_FREG(rs1_num)); \ float32_t vs2 = P.VU.elt(rs2_num, i); \ - BODY; \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t &vd = P.VU.elt(rd_num, i); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + BODY64; \ set_fp_exceptions; \ break; \ }\ diff --git a/riscv/insns/vfadd_vf.h b/riscv/insns/vfadd_vf.h index 60dec4a..bdb7f75 100644 --- a/riscv/insns/vfadd_vf.h +++ b/riscv/insns/vfadd_vf.h @@ -1,5 +1,8 @@ // vfadd.vf vd, vs2, rs1 VI_VFP_VF_LOOP ({ - vd = f32_add(rs1, vs2); + vd = f32_add(rs1, vs2); +}, +{ + vd = f64_add(rs1, vs2); }) diff --git a/riscv/insns/vfcvt_f_x_v.h b/riscv/insns/vfcvt_f_x_v.h index f6604fb..fdaa697 100644 --- a/riscv/insns/vfcvt_f_x_v.h +++ b/riscv/insns/vfcvt_f_x_v.h @@ -3,4 +3,8 @@ VI_VFP_VF_LOOP ({ auto vs2_i = P.VU.elt(rs2_num, i); vd = i32_to_f32(vs2_i); +}, +{ + auto vs2_i = P.VU.elt(rs2_num, i); + vd = i64_to_f64(vs2_i); }) diff --git a/riscv/insns/vfcvt_f_xu_v.h b/riscv/insns/vfcvt_f_xu_v.h index 2c845ac..01ea61c 100644 --- a/riscv/insns/vfcvt_f_xu_v.h +++ b/riscv/insns/vfcvt_f_xu_v.h @@ -3,4 +3,8 @@ VI_VFP_VF_LOOP ({ auto vs2_u = P.VU.elt(rs2_num, i); vd = ui32_to_f32(vs2_u); +}, +{ + auto vs2_u = P.VU.elt(rs2_num, i); + vd = ui64_to_f64(vs2_u); }) diff --git a/riscv/insns/vfcvt_x_f_v.h b/riscv/insns/vfcvt_x_f_v.h index a9eedc4..96bc481 100644 --- a/riscv/insns/vfcvt_x_f_v.h +++ b/riscv/insns/vfcvt_x_f_v.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ P.VU.elt(rd_num, i) = f32_to_i32(vs2, STATE.frm, true); +}, +{ + P.VU.elt(rd_num, i) = f64_to_i64(vs2, STATE.frm, true); }) diff --git a/riscv/insns/vfdiv_vf.h b/riscv/insns/vfdiv_vf.h index 2b8124c..ce21730 100644 --- a/riscv/insns/vfdiv_vf.h +++ b/riscv/insns/vfdiv_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_div(vs2, rs1); +}, +{ + vd = f64_div(vs2, rs1); }) diff --git a/riscv/insns/vfmacc_vf.h b/riscv/insns/vfmacc_vf.h index 5013d34..fca4184 100644 --- a/riscv/insns/vfmacc_vf.h +++ b/riscv/insns/vfmacc_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(rs1, vs2, vd); +}, +{ + vd = f64_mulAdd(rs1, vs2, vd); }) diff --git a/riscv/insns/vfmadd_vf.h b/riscv/insns/vfmadd_vf.h index 920c392..7707dae 100644 --- a/riscv/insns/vfmadd_vf.h +++ b/riscv/insns/vfmadd_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(vd, rs1, vs2); +}, +{ + vd = f64_mulAdd(vd, rs1, vs2); }) diff --git a/riscv/insns/vfmax_vf.h b/riscv/insns/vfmax_vf.h index eb70e48..a8df880 100644 --- a/riscv/insns/vfmax_vf.h +++ b/riscv/insns/vfmax_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_max(vs2, rs1); +}, +{ + vd = f64_max(vs2, rs1); }) diff --git a/riscv/insns/vfmin_vf.h b/riscv/insns/vfmin_vf.h index bf06638..a55462b 100644 --- a/riscv/insns/vfmin_vf.h +++ b/riscv/insns/vfmin_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_min(vs2, rs1); +}, +{ + vd = f64_min(vs2, rs1); }) diff --git a/riscv/insns/vfmsac_vf.h b/riscv/insns/vfmsac_vf.h index 23661b3..0f42560 100644 --- a/riscv/insns/vfmsac_vf.h +++ b/riscv/insns/vfmsac_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(rs1, vs2, f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, vs2, f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfmsub_vf.h b/riscv/insns/vfmsub_vf.h index 2328d07..bd968e3 100644 --- a/riscv/insns/vfmsub_vf.h +++ b/riscv/insns/vfmsub_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(vd, rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(vd, rs1, f64(vs2.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfmul_vf.h b/riscv/insns/vfmul_vf.h index 086b6d8..9e7d481 100644 --- a/riscv/insns/vfmul_vf.h +++ b/riscv/insns/vfmul_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mul(vs2, rs1); +}, +{ + vd = f64_mul(vs2, rs1); }) diff --git a/riscv/insns/vfnmacc_vf.h b/riscv/insns/vfnmacc_vf.h index 04a31bf..da58d3a 100644 --- a/riscv/insns/vfnmacc_vf.h +++ b/riscv/insns/vfnmacc_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), f32(vd.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), f64(vd.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfnmadd_vf.h b/riscv/insns/vfnmadd_vf.h index f8f3b83..b26f377 100644 --- a/riscv/insns/vfnmadd_vf.h +++ b/riscv/insns/vfnmadd_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, f32(vs2.v ^ F32_SIGN)); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, f64(vs2.v ^ F64_SIGN)); }) diff --git a/riscv/insns/vfnmsac_vf.h b/riscv/insns/vfnmsac_vf.h index c3dc12c..b78d0ca 100644 --- a/riscv/insns/vfnmsac_vf.h +++ b/riscv/insns/vfnmsac_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(rs1, f32(vs2.v ^ F32_SIGN), vd); +}, +{ + vd = f64_mulAdd(rs1, f64(vs2.v ^ F64_SIGN), vd); }) diff --git a/riscv/insns/vfnmsub_vf.h b/riscv/insns/vfnmsub_vf.h index 1879b9e..6c6dc27 100644 --- a/riscv/insns/vfnmsub_vf.h +++ b/riscv/insns/vfnmsub_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_mulAdd(f32(vd.v ^ F32_SIGN), rs1, vs2); +}, +{ + vd = f64_mulAdd(f64(vd.v ^ F64_SIGN), rs1, vs2); }) diff --git a/riscv/insns/vfrdiv_vf.h b/riscv/insns/vfrdiv_vf.h index 49e4293..73ec534 100644 --- a/riscv/insns/vfrdiv_vf.h +++ b/riscv/insns/vfrdiv_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_div(rs1, vs2); +}, +{ + vd = f64_div(rs1, vs2); }) diff --git a/riscv/insns/vfrsub_vf.h b/riscv/insns/vfrsub_vf.h index ee8ac83..d9a1986 100644 --- a/riscv/insns/vfrsub_vf.h +++ b/riscv/insns/vfrsub_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_sub(rs1, vs2); +}, +{ + vd = f64_sub(rs1, vs2); }) diff --git a/riscv/insns/vfsgnj_vf.h b/riscv/insns/vfsgnj_vf.h index d93f175..c7f731b 100644 --- a/riscv/insns/vfsgnj_vf.h +++ b/riscv/insns/vfsgnj_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = fsgnj32(vs2.v, rs1.v, false, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, false); }) diff --git a/riscv/insns/vfsgnjn_vf.h b/riscv/insns/vfsgnjn_vf.h index 303ec76..4511748 100644 --- a/riscv/insns/vfsgnjn_vf.h +++ b/riscv/insns/vfsgnjn_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = fsgnj32(vs2.v, rs1.v, true, false); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, true, false); }) diff --git a/riscv/insns/vfsgnjx_vf.h b/riscv/insns/vfsgnjx_vf.h index 93f4303..c423060 100644 --- a/riscv/insns/vfsgnjx_vf.h +++ b/riscv/insns/vfsgnjx_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = fsgnj32(vs2.v, rs1.v, false, true); +}, +{ + vd = fsgnj64(vs2.v, rs1.v, false, true); }) diff --git a/riscv/insns/vfsub_vf.h b/riscv/insns/vfsub_vf.h index 38d6acc..a4702d0 100644 --- a/riscv/insns/vfsub_vf.h +++ b/riscv/insns/vfsub_vf.h @@ -2,4 +2,7 @@ VI_VFP_VF_LOOP ({ vd = f32_sub(vs2, rs1); +}, +{ + vd = f64_sub(vs2, rs1); }) -- cgit v1.1 From a94b8914a622820d1b8ba1bb66ce1c8a544a4073 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 28 Nov 2019 20:06:06 -0800 Subject: rvv: add vmfxxx.v[vf] float64 support Signed-off-by: Chih-Min Chao --- riscv/decode.h | 31 +++++++++++++++++++++++++------ riscv/insns/vmfeq_vf.h | 8 ++++++-- riscv/insns/vmfeq_vv.h | 8 ++++++-- riscv/insns/vmfge_vf.h | 8 ++++++-- riscv/insns/vmfgt_vf.h | 8 ++++++-- riscv/insns/vmfle_vf.h | 8 ++++++-- riscv/insns/vmfle_vv.h | 8 ++++++-- riscv/insns/vmflt_vf.h | 8 ++++++-- riscv/insns/vmflt_vv.h | 8 ++++++-- riscv/insns/vmfne_vf.h | 8 ++++++-- riscv/insns/vmfne_vv.h | 8 ++++++-- 11 files changed, 85 insertions(+), 26 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 62e798e..acb0c4c 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1655,12 +1655,12 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_LOOP_CMP_END \ switch(P.VU.vsew) { \ - case e32: { \ + case e32: \ + case e64: { \ vdi = (vdi & ~mmask) | (((res) << mpos) & mmask); \ break; \ } \ case e16: \ - case e8: \ default: \ require(0); \ break; \ @@ -1741,12 +1741,31 @@ for (reg_t i = 0; i < vlmax; ++i) { \ DEBUG_RVV_FP_VF; \ VI_VFP_LOOP_END -#define VI_VFP_LOOP_CMP(BODY, is_vs1) \ +#define VI_VFP_LOOP_CMP(BODY32, BODY64, is_vs1) \ VI_CHECK_MSS(is_vs1); \ VI_VFP_LOOP_CMP_BASE \ - BODY; \ - set_fp_exceptions; \ - DEBUG_RVV_FP_VV; \ + switch(P.VU.vsew) { \ + case e32: {\ + float32_t vs2 = P.VU.elt(rs2_num, i); \ + float32_t vs1 = P.VU.elt(rs1_num, i); \ + float32_t rs1 = f32(READ_FREG(rs1_num)); \ + BODY32; \ + set_fp_exceptions; \ + break; \ + }\ + case e64: {\ + float64_t vs2 = P.VU.elt(rs2_num, i); \ + float64_t vs1 = P.VU.elt(rs1_num, i); \ + float64_t rs1 = f64(READ_FREG(rs1_num)); \ + BODY64; \ + set_fp_exceptions; \ + break; \ + }\ + case e16: \ + default: \ + require(0); \ + break; \ + }; \ VI_VFP_LOOP_CMP_END \ #define VI_VFP_VF_LOOP_WIDE(BODY) \ diff --git a/riscv/insns/vmfeq_vf.h b/riscv/insns/vmfeq_vf.h index f0e7109..766f0ab 100644 --- a/riscv/insns/vmfeq_vf.h +++ b/riscv/insns/vmfeq_vf.h @@ -1,5 +1,9 @@ -// vfeq.vf vd, vs2, fs1 +// vmfeq.vf vd, vs2, fs1 VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, rs1); -}, false) +}, +{ + res = f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfeq_vv.h b/riscv/insns/vmfeq_vv.h index 1be3a69..19117fc 100644 --- a/riscv/insns/vmfeq_vv.h +++ b/riscv/insns/vmfeq_vv.h @@ -1,5 +1,9 @@ -// vfeq.vv vd, vs2, vs1 +// vmfeq.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ res = f32_eq(vs2, vs1); -}, true) +}, +{ + res = f64_eq(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfge_vf.h b/riscv/insns/vmfge_vf.h index 1c68366..c5f4c83 100644 --- a/riscv/insns/vmfge_vf.h +++ b/riscv/insns/vmfge_vf.h @@ -1,5 +1,9 @@ -// vfge.vf vd, vs2, rs1 +// vmfge.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = f32_le(rs1, vs2); -}, false) +}, +{ + res = f64_le(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfgt_vf.h b/riscv/insns/vmfgt_vf.h index 0979185..5387300 100644 --- a/riscv/insns/vmfgt_vf.h +++ b/riscv/insns/vmfgt_vf.h @@ -1,5 +1,9 @@ -// vfgt.vf vd, vs2, rs1 +// vmfgt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = f32_lt(rs1, vs2); -}, false) +}, +{ + res = f64_lt(rs1, vs2); +}, +false) diff --git a/riscv/insns/vmfle_vf.h b/riscv/insns/vmfle_vf.h index 90607ec..1a3a7c4 100644 --- a/riscv/insns/vmfle_vf.h +++ b/riscv/insns/vmfle_vf.h @@ -1,5 +1,9 @@ -// vfle.vf vd, vs2, rs1 +// vmfle.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = f32_le(vs2, rs1); -}, false) +}, +{ + res = f64_le(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfle_vv.h b/riscv/insns/vmfle_vv.h index 6ccdfec..067f1a9 100644 --- a/riscv/insns/vmfle_vv.h +++ b/riscv/insns/vmfle_vv.h @@ -1,5 +1,9 @@ -// vfle.vv vd, vs2, rs1 +// vmfle.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = f32_le(vs2, vs1); -}, true) +}, +{ + res = f64_le(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmflt_vf.h b/riscv/insns/vmflt_vf.h index 6b71a4a..248071d 100644 --- a/riscv/insns/vmflt_vf.h +++ b/riscv/insns/vmflt_vf.h @@ -1,5 +1,9 @@ -// vflt.vf vd, vs2, rs1 +// vmflt.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = f32_lt(vs2, rs1); -}, false) +}, +{ + res = f64_lt(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmflt_vv.h b/riscv/insns/vmflt_vv.h index a2ed8e3..71895df 100644 --- a/riscv/insns/vmflt_vv.h +++ b/riscv/insns/vmflt_vv.h @@ -1,5 +1,9 @@ -// vflt.vv vd, vs2, vs1 +// vmflt.vv vd, vs2, vs1 VI_VFP_LOOP_CMP ({ res = f32_lt(vs2, vs1); -}, true) +}, +{ + res = f64_lt(vs2, vs1); +}, +true) diff --git a/riscv/insns/vmfne_vf.h b/riscv/insns/vmfne_vf.h index ef63678..afccbcb 100644 --- a/riscv/insns/vmfne_vf.h +++ b/riscv/insns/vmfne_vf.h @@ -1,5 +1,9 @@ -// vfne.vf vd, vs2, rs1 +// vmfne.vf vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, rs1); -}, false) +}, +{ + res = !f64_eq(vs2, rs1); +}, +false) diff --git a/riscv/insns/vmfne_vv.h b/riscv/insns/vmfne_vv.h index 8378a23..d5df60c 100644 --- a/riscv/insns/vmfne_vv.h +++ b/riscv/insns/vmfne_vv.h @@ -1,5 +1,9 @@ -// vfne.vv vd, vs2, rs1 +// vmfne.vv vd, vs2, rs1 VI_VFP_LOOP_CMP ({ res = !f32_eq(vs2, vs1); -}, true) +}, +{ + res = !f64_eq(vs2, vs1); +}, +true) -- cgit v1.1 From 4436424174070e0e84aa2fe1b1d1450771253f36 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Fri, 29 Nov 2019 02:16:36 -0800 Subject: rvv: add vfredxxx.vs and vfwred[o]sum.vs float64 support Signed-off-by: Chih-Min Chao --- riscv/decode.h | 40 ++++++++++++++++++++++++++++------------ riscv/insns/vfredmax_vs.h | 3 +++ riscv/insns/vfredmin_vs.h | 3 +++ riscv/insns/vfredosum_vs.h | 3 +++ riscv/insns/vfredsum_vs.h | 3 +++ riscv/insns/vfwredosum_vs.h | 4 ++++ riscv/insns/vfwredsum_vs.h | 4 ++++ 7 files changed, 48 insertions(+), 12 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index acb0c4c..256aca3 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1621,14 +1621,14 @@ for (reg_t i = 0; i < vlmax; ++i) { \ uint64_t &vdi = P.VU.elt(rd_num, midx); \ uint64_t res = 0; -#define VI_VFP_LOOP_REDUCTION_BASE \ - VI_VFP_COMMON \ - float32_t vd_0 = P.VU.elt(rd_num, 0); \ - float32_t vs1_0 = P.VU.elt(rs1_num, 0); \ +#define VI_VFP_LOOP_REDUCTION_BASE(width) \ + float##width##_t vd_0 = P.VU.elt(rd_num, 0); \ + float##width##_t vs1_0 = P.VU.elt(rs1_num, 0); \ vd_0 = vs1_0;\ for (reg_t i=P.VU.vstart; i(rd_num, i); \ + int##width##_t &vd = P.VU.elt(rd_num, i); \ + float##width##_t vs2 = P.VU.elt(rs2_num, i); \ #define VI_VFP_LOOP_WIDE_REDUCTION_BASE \ VI_VFP_COMMON \ @@ -1648,7 +1648,6 @@ for (reg_t i = 0; i < vlmax; ++i) { \ #define VI_VFP_LOOP_REDUCTION_END(x) \ } \ P.VU.vstart = 0; \ - set_fp_exceptions; \ if (vl > 0) { \ P.VU.elt::type>(rd_num, 0) = vd_0.v; \ } @@ -1697,18 +1696,35 @@ for (reg_t i = 0; i < vlmax; ++i) { \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_END -#define VI_VFP_VV_LOOP_REDUCTION(BODY) \ +#define VI_VFP_VV_LOOP_REDUCTION(BODY32, BODY64) \ VI_CHECK_REDUCTION(false) \ - VI_VFP_LOOP_REDUCTION_BASE \ - float32_t vs2 = P.VU.elt(rs2_num, i); \ - BODY; \ - DEBUG_RVV_FP_VV; \ - VI_VFP_LOOP_REDUCTION_END(e32) + VI_VFP_COMMON \ + switch(P.VU.vsew) { \ + case e32: {\ + VI_VFP_LOOP_REDUCTION_BASE(32) \ + BODY32; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e32) \ + break; \ + }\ + case e64: {\ + VI_VFP_LOOP_REDUCTION_BASE(64) \ + BODY64; \ + set_fp_exceptions; \ + VI_VFP_LOOP_REDUCTION_END(e64) \ + break; \ + }\ + case e16: \ + default: \ + require(0); \ + break; \ + }; \ #define VI_VFP_VV_LOOP_WIDE_REDUCTION(BODY) \ VI_VFP_LOOP_WIDE_REDUCTION_BASE \ float64_t vs2 = f32_to_f64(P.VU.elt(rs2_num, i)); \ BODY; \ + set_fp_exceptions; \ DEBUG_RVV_FP_VV; \ VI_VFP_LOOP_REDUCTION_END(e64) diff --git a/riscv/insns/vfredmax_vs.h b/riscv/insns/vfredmax_vs.h index dca10bf..cb03dbb 100644 --- a/riscv/insns/vfredmax_vs.h +++ b/riscv/insns/vfredmax_vs.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP_REDUCTION ({ vd_0 = f32_max(vd_0, vs2); +}, +{ + vd_0 = f64_max(vd_0, vs2); }) diff --git a/riscv/insns/vfredmin_vs.h b/riscv/insns/vfredmin_vs.h index b4556bc..51c0bcb 100644 --- a/riscv/insns/vfredmin_vs.h +++ b/riscv/insns/vfredmin_vs.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP_REDUCTION ({ vd_0 = f32_min(vd_0, vs2); +}, +{ + vd_0 = f64_min(vd_0, vs2); }) diff --git a/riscv/insns/vfredosum_vs.h b/riscv/insns/vfredosum_vs.h index 87422ee..7de6dbb 100644 --- a/riscv/insns/vfredosum_vs.h +++ b/riscv/insns/vfredosum_vs.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP_REDUCTION ({ vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); }) diff --git a/riscv/insns/vfredsum_vs.h b/riscv/insns/vfredsum_vs.h index b50b45f..7b5cccc 100644 --- a/riscv/insns/vfredsum_vs.h +++ b/riscv/insns/vfredsum_vs.h @@ -2,4 +2,7 @@ VI_VFP_VV_LOOP_REDUCTION ({ vd_0 = f32_add(vd_0, vs2); +}, +{ + vd_0 = f64_add(vd_0, vs2); }) diff --git a/riscv/insns/vfwredosum_vs.h b/riscv/insns/vfwredosum_vs.h index b47e2c7..d6da222 100644 --- a/riscv/insns/vfwredosum_vs.h +++ b/riscv/insns/vfwredosum_vs.h @@ -1,4 +1,8 @@ // vfwredosum.vs vd, vs2, vs1 +require_vector; +require(P.VU.vsew * 2 <= P.VU.ELEN); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(P.VU.vlmul <= 4); VI_VFP_VV_LOOP_WIDE_REDUCTION ({ vd_0 = f64_add(vd_0, vs2); diff --git a/riscv/insns/vfwredsum_vs.h b/riscv/insns/vfwredsum_vs.h index 3ce591b..13bd1ab 100644 --- a/riscv/insns/vfwredsum_vs.h +++ b/riscv/insns/vfwredsum_vs.h @@ -1,4 +1,8 @@ // vfwredsum.vs vd, vs2, vs1 +require_vector; +require(P.VU.vsew * 2 <= P.VU.ELEN); +require((insn.rs2() & (P.VU.vlmul - 1)) == 0); +require(P.VU.vlmul <= 4); VI_VFP_VV_LOOP_WIDE_REDUCTION ({ vd_0 = f64_add(vd_0, vs2); -- cgit v1.1 From 4ac95a8c99d19c4db3be648e88f853ddf4f66d53 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 2 Dec 2019 06:53:21 -0800 Subject: rvv: refinve vfmv to support float64 Signed-off-by: Chih-Min Chao --- riscv/insns/vfmerge_vfm.h | 37 +++++++++++++++++++++++++++++-------- riscv/insns/vfmv_f_s.h | 8 +------- riscv/insns/vfmv_s_f.h | 25 ++++++++++++++++--------- riscv/insns/vfmv_v_f.h | 21 ++++++++++++++++----- 4 files changed, 62 insertions(+), 29 deletions(-) diff --git a/riscv/insns/vfmerge_vfm.h b/riscv/insns/vfmerge_vfm.h index 0ffa49b..639809d 100644 --- a/riscv/insns/vfmerge_vfm.h +++ b/riscv/insns/vfmerge_vfm.h @@ -3,16 +3,37 @@ require(insn.rd() != 0); VI_CHECK_SSS(false); VI_VFP_COMMON; reg_t sew = P.VU.vsew; -for (reg_t i=P.VU.vstart; i(rd_num, i); - auto rs1 = f32(READ_FREG(rs1_num)); - auto vs2 = P.VU.elt(rs2_num, i); - int midx = (P.VU.vmlen * i) / 64; - int mpos = (P.VU.vmlen * i) % 64; - bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; +switch(P.VU.vsew) { + case 32: + for (reg_t i=P.VU.vstart; i(rd_num, i); + auto rs1 = f32(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); - vd = use_first ? rs1 : vs2; + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + case 64: + for (reg_t i=P.VU.vstart; i(rd_num, i); + auto rs1 = f64(READ_FREG(rs1_num)); + auto vs2 = P.VU.elt(rs2_num, i); + + int midx = (P.VU.vmlen * i) / 64; + int mpos = (P.VU.vmlen * i) % 64; + bool use_first = (P.VU.elt(0, midx) >> mpos) & 0x1; + + vd = use_first ? rs1 : vs2; + } + break; + default: + require(0); + break; } P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_f_s.h b/riscv/insns/vfmv_f_s.h index 066db80..dbfe8f9 100644 --- a/riscv/insns/vfmv_f_s.h +++ b/riscv/insns/vfmv_f_s.h @@ -2,18 +2,12 @@ require_vector; require_fp; require_extension('F'); -require(P.VU.vsew == e8 || P.VU.vsew == e16 || P.VU.vsew == e32 || P.VU.vsew == e64); +require(P.VU.vsew == e32 || P.VU.vsew == e64); reg_t rs2_num = insn.rs2(); uint64_t vs2_0 = 0; const reg_t sew = P.VU.vsew; switch(sew) { -case e8: - vs2_0 = P.VU.elt(rs2_num, 0); - break; -case e16: - vs2_0 = P.VU.elt(rs2_num, 0); - break; case e32: vs2_0 = P.VU.elt(rs2_num, 0); break; diff --git a/riscv/insns/vfmv_s_f.h b/riscv/insns/vfmv_s_f.h index 8ff6094..44e9e2e 100644 --- a/riscv/insns/vfmv_s_f.h +++ b/riscv/insns/vfmv_s_f.h @@ -1,19 +1,26 @@ // vfmv_s_f: vd[0] = rs1 (vs2=0) require_vector; -require(insn.v_vm() == 1); require_fp; require_extension('F'); -require(P.VU.vsew == e32); +require(P.VU.vsew >= e32 && P.VU.vsew <= 64); reg_t vl = P.VU.vl; if (vl > 0) { reg_t rd_num = insn.rd(); - reg_t sew = P.VU.vsew; - if (FLEN == 64) - P.VU.elt(rd_num, 0) = f64(FRS1).v; - else - P.VU.elt(rd_num, 0) = f32(FRS1).v; - - vl = 0; + switch(P.VU.vsew) { + case 32: + if (FLEN == 64) + P.VU.elt(rd_num, 0) = f64(FRS1).v; + else + P.VU.elt(rd_num, 0) = f32(FRS1).v; + break; + case 64: + if (FLEN == 64) + P.VU.elt(rd_num, 0) = f64(FRS1).v; + else + P.VU.elt(rd_num, 0) = f32(FRS1).v; + break; + } } +P.VU.vstart = 0; diff --git a/riscv/insns/vfmv_v_f.h b/riscv/insns/vfmv_v_f.h index f323263..75832f9 100644 --- a/riscv/insns/vfmv_v_f.h +++ b/riscv/insns/vfmv_v_f.h @@ -1,12 +1,23 @@ // vfmv_vf vd, vs1 require((insn.rd() & (P.VU.vlmul - 1)) == 0); VI_VFP_COMMON -reg_t sew = P.VU.vsew; -for (reg_t i=P.VU.vstart; i(rd_num, i); - auto rs1 = f32(READ_FREG(rs1_num)); +switch(P.VU.vsew) { + case e32: + for (reg_t i=P.VU.vstart; i(rd_num, i); + auto rs1 = f32(READ_FREG(rs1_num)); - vd = rs1; + vd = rs1; + } + break; + case e64: + for (reg_t i=P.VU.vstart; i(rd_num, i); + auto rs1 = f64(READ_FREG(rs1_num)); + + vd = rs1; + } + break; } P.VU.vstart = 0; -- cgit v1.1 From ca648e6e24a8968f4e33ca1859d37a760004e953 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Wed, 11 Dec 2019 00:26:01 -0800 Subject: rvv: fix vfwcvt/vfncvt for f32 -> f64 and f64 -> f32 1. fix disam 2. refine checking rule and move them out of loop 3. add missing exception keeping for each element Signed-off-by: Chih-Min Chao --- riscv/decode.h | 4 +--- riscv/insns/vfncvt_f_f_w.h | 6 +++++- riscv/insns/vfncvt_f_x_w.h | 6 +++++- riscv/insns/vfncvt_f_xu_w.h | 6 +++++- riscv/insns/vfncvt_rod_f_f_w.h | 6 +++++- riscv/insns/vfncvt_x_f_w.h | 6 +++++- riscv/insns/vfncvt_xu_f_w.h | 6 +++++- riscv/insns/vfwcvt_f_f_v.h | 5 ++++- riscv/insns/vfwcvt_f_x_v.h | 5 ++++- riscv/insns/vfwcvt_f_xu_v.h | 5 ++++- riscv/insns/vfwcvt_x_f_v.h | 5 ++++- riscv/insns/vfwcvt_xu_f_v.h | 5 ++++- spike_main/disasm.cc | 20 +++++++++----------- 13 files changed, 60 insertions(+), 25 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 256aca3..0076145 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -441,12 +441,10 @@ static inline bool is_overlapped(const int astart, const int asize, require(insn.rd() != 0); \ } -#define VI_CHECK_SD \ - require(!is_overlapped(insn.rd(), P.VU.vlmul, insn.rs2(), P.VU.vlmul * 2)); - #define VI_CHECK_DSS(is_vs1) \ VI_WIDE_CHECK_COMMON; \ require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs2(), P.VU.vlmul)); \ + require((insn.rd() & (P.VU.vlmul * 2 - 1)) == 0); \ require((insn.rs2() & (P.VU.vlmul - 1)) == 0); \ if (is_vs1) {\ require(!is_overlapped(insn.rd(), P.VU.vlmul * 2, insn.rs1(), P.VU.vlmul)); \ diff --git a/riscv/insns/vfncvt_f_f_w.h b/riscv/insns/vfncvt_f_f_w.h index 42c18c7..55a8eac 100644 --- a/riscv/insns/vfncvt_f_f_w.h +++ b/riscv/insns/vfncvt_f_f_w.h @@ -1,6 +1,10 @@ // vfncvt.f.f.v vd, vs2, vm -VI_CHECK_SD; +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f64_to_f32(vs2); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_f_x_w.h b/riscv/insns/vfncvt_f_x_w.h index 80ebe00..daf2274 100644 --- a/riscv/insns/vfncvt_f_x_w.h +++ b/riscv/insns/vfncvt_f_x_w.h @@ -1,6 +1,10 @@ // vfncvt.f.x.v vd, vs2, vm -VI_CHECK_SD; +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = i64_to_f32(vs2); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_f_xu_w.h b/riscv/insns/vfncvt_f_xu_w.h index 013f57c..7f57ec5 100644 --- a/riscv/insns/vfncvt_f_xu_w.h +++ b/riscv/insns/vfncvt_f_xu_w.h @@ -1,6 +1,10 @@ // vfncvt.f.xu.v vd, vs2, vm -VI_CHECK_SD; +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = ui64_to_f32(vs2); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_rod_f_f_w.h b/riscv/insns/vfncvt_rod_f_f_w.h index 77a3873..130c5b5 100644 --- a/riscv/insns/vfncvt_rod_f_f_w.h +++ b/riscv/insns/vfncvt_rod_f_f_w.h @@ -1,7 +1,11 @@ // vfncvt.f.f.v vd, vs2, vm -VI_CHECK_SD; +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE softfloat_roundingMode = softfloat_round_odd; auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f64_to_f32(vs2); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_x_f_w.h b/riscv/insns/vfncvt_x_f_w.h index 8985f1b..cda2fe2 100644 --- a/riscv/insns/vfncvt_x_f_w.h +++ b/riscv/insns/vfncvt_x_f_w.h @@ -1,6 +1,10 @@ // vfncvt.x.f.v vd, vs2, vm +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_SD; auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f64_to_i32(vs2, STATE.frm, true); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfncvt_xu_f_w.h b/riscv/insns/vfncvt_xu_f_w.h index 2db8d82..a009105 100644 --- a/riscv/insns/vfncvt_xu_f_w.h +++ b/riscv/insns/vfncvt_xu_f_w.h @@ -1,6 +1,10 @@ // vfncvt.xu.f.v vd, vs2, vm +VI_CHECK_SDS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_SD; auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f64_to_ui32(vs2, STATE.frm, true); + set_fp_exceptions; VI_VFP_LOOP_END diff --git a/riscv/insns/vfwcvt_f_f_v.h b/riscv/insns/vfwcvt_f_f_v.h index 4d6b4fc..4bda2bc 100644 --- a/riscv/insns/vfwcvt_f_f_v.h +++ b/riscv/insns/vfwcvt_f_f_v.h @@ -1,6 +1,9 @@ // vfwcvt.f.f.v vd, vs2, vm +VI_CHECK_DSS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_DSS(false); auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f32_to_f64(vs2); set_fp_exceptions; diff --git a/riscv/insns/vfwcvt_f_x_v.h b/riscv/insns/vfwcvt_f_x_v.h index ab5d825..346db32 100644 --- a/riscv/insns/vfwcvt_f_x_v.h +++ b/riscv/insns/vfwcvt_f_x_v.h @@ -1,6 +1,9 @@ // vfwcvt.f.x.v vd, vs2, vm +VI_CHECK_DSS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_DSS(false); auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = i32_to_f64(vs2); set_fp_exceptions; diff --git a/riscv/insns/vfwcvt_f_xu_v.h b/riscv/insns/vfwcvt_f_xu_v.h index 8af8d7c..c963abb 100644 --- a/riscv/insns/vfwcvt_f_xu_v.h +++ b/riscv/insns/vfwcvt_f_xu_v.h @@ -1,6 +1,9 @@ // vfwcvt.f.xu.v vd, vs2, vm +VI_CHECK_DSS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_DSS(false); auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = ui32_to_f64(vs2); set_fp_exceptions; diff --git a/riscv/insns/vfwcvt_x_f_v.h b/riscv/insns/vfwcvt_x_f_v.h index 06e81d4..9088a79 100644 --- a/riscv/insns/vfwcvt_x_f_v.h +++ b/riscv/insns/vfwcvt_x_f_v.h @@ -1,6 +1,9 @@ // vfwcvt.x.f.v vd, vs2, vm +VI_CHECK_DSS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_DSS(false); auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f32_to_i64(vs2, STATE.frm, true); set_fp_exceptions; diff --git a/riscv/insns/vfwcvt_xu_f_v.h b/riscv/insns/vfwcvt_xu_f_v.h index cc82481..266cbca 100644 --- a/riscv/insns/vfwcvt_xu_f_v.h +++ b/riscv/insns/vfwcvt_xu_f_v.h @@ -1,6 +1,9 @@ // vfwcvt.xu.f.v vd, vs2, vm +VI_CHECK_DSS(false); +if (P.VU.vsew == e32) + require(p->supports_extension('D')); + VI_VFP_LOOP_BASE - VI_CHECK_DSS(false); auto vs2 = P.VU.elt(rs2_num, i); P.VU.elt(rd_num, i) = f32_to_ui64(vs2, STATE.frm, true); set_fp_exceptions; diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc index 373c6bb..5ecad58 100644 --- a/spike_main/disasm.cc +++ b/spike_main/disasm.cc @@ -1037,23 +1037,19 @@ disassembler_t::disassembler_t(int xlen) add_insn(new disasm_insn_t(#name ".vf", match_##name##_vf, mask_##name##_vf, \ {&vd, &vs2, &frs1, &opt, &vm})); \ - #define DISASM_VFUNARY0_INSN(name, extra, suf) \ + #define DISASM_VFUNARY0_INSN(name, suf) \ add_insn(new disasm_insn_t(#name "cvt.xu.f." #suf, \ match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ {&vd, &vs2, &opt, &vm})); \ add_insn(new disasm_insn_t(#name "cvt.x.f." #suf, \ - match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + match_##name##cvt_x_f_##suf, mask_##name##cvt_x_f_##suf, \ {&vd, &vs2, &opt, &vm})); \ add_insn(new disasm_insn_t(#name "cvt.f.xu." #suf, \ - match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + match_##name##cvt_f_xu_##suf, mask_##name##cvt_f_xu_##suf, \ {&vd, &vs2, &opt, &vm})); \ add_insn(new disasm_insn_t(#name "cvt.f.x." #suf, \ - match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ + match_##name##cvt_f_x_##suf, mask_##name##cvt_f_x_##suf, \ {&vd, &vs2, &opt, &vm})); \ - if (extra) \ - add_insn(new disasm_insn_t(#name "cvt.f.f." #suf, \ - match_##name##cvt_xu_f_##suf, mask_##name##cvt_xu_f_##suf, \ - {&vd, &vs2, &opt, &vm})); \ //OPFVV/OPFVF //0b01_0000 @@ -1086,11 +1082,13 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV__F_INSN(vfrdiv); //vfunary0 - DISASM_VFUNARY0_INSN(vf, 0, v); + DISASM_VFUNARY0_INSN(vf, v); - DISASM_VFUNARY0_INSN(vfw, 1, v); + DISASM_VFUNARY0_INSN(vfw, v); + DISASM_INSN("vfwcvt.f.f.v", vfwcvt_f_f_v, 0, {&vd, &vs2, &opt, &vm}); - DISASM_VFUNARY0_INSN(vfn, 1, w); + DISASM_VFUNARY0_INSN(vfn, w); + DISASM_INSN("vfncvt.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm}); DISASM_INSN("vfncvt.rod.f.f.w", vfncvt_rod_f_f_w, 0, {&vd, &vs2, &opt, &vm}); //vfunary1 -- cgit v1.1 From a1ed3764b06907ab36e1a495285f54c093b85b79 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 6 Jan 2020 00:09:46 -0800 Subject: rvv: add vmv[1248]r.v simple register copy instructions Signed-off-by: Chih-Min Chao --- riscv/encoding.h | 12 ++++++++++++ riscv/insns/vl1r_v.h | 2 -- riscv/insns/vmv1r_v.h | 2 ++ riscv/insns/vmv2r_v.h | 2 ++ riscv/insns/vmv4r_v.h | 2 ++ riscv/insns/vmv8r_v.h | 2 ++ riscv/insns/vmvnfr_v.h | 12 ++++++++++++ riscv/riscv.mk.in | 4 ++++ spike_main/disasm.cc | 13 +++++++++---- 9 files changed, 45 insertions(+), 6 deletions(-) create mode 100644 riscv/insns/vmv1r_v.h create mode 100644 riscv/insns/vmv2r_v.h create mode 100644 riscv/insns/vmv4r_v.h create mode 100644 riscv/insns/vmv8r_v.h create mode 100644 riscv/insns/vmvnfr_v.h diff --git a/riscv/encoding.h b/riscv/encoding.h index 6d691e7..26142db 100644 --- a/riscv/encoding.h +++ b/riscv/encoding.h @@ -1266,6 +1266,14 @@ #define MASK_VSADD_VI 0xfc00707f #define MATCH_VSLL_VI 0x94003057 #define MASK_VSLL_VI 0xfc00707f +#define MATCH_VMV1R_V 0x9e003057 +#define MASK_VMV1R_V 0xfe0ff07f +#define MATCH_VMV2R_V 0x9e00b057 +#define MASK_VMV2R_V 0xfe0ff07f +#define MATCH_VMV4R_V 0x9e01b057 +#define MASK_VMV4R_V 0xfe0ff07f +#define MATCH_VMV8R_V 0x9e03b057 +#define MASK_VMV8R_V 0xfe0ff07f #define MATCH_VSRL_VI 0xa0003057 #define MASK_VSRL_VI 0xfc00707f #define MATCH_VSRA_VI 0xa4003057 @@ -2305,6 +2313,10 @@ DECLARE_INSN(vmsgt_vi, MATCH_VMSGT_VI, MASK_VMSGT_VI) DECLARE_INSN(vsaddu_vi, MATCH_VSADDU_VI, MASK_VSADDU_VI) DECLARE_INSN(vsadd_vi, MATCH_VSADD_VI, MASK_VSADD_VI) DECLARE_INSN(vsll_vi, MATCH_VSLL_VI, MASK_VSLL_VI) +DECLARE_INSN(vmv1r_v, MATCH_VMV1R_V, MASK_VMV1R_V) +DECLARE_INSN(vmv2r_v, MATCH_VMV2R_V, MASK_VMV2R_V) +DECLARE_INSN(vmv4r_v, MATCH_VMV4R_V, MASK_VMV4R_V) +DECLARE_INSN(vmv8r_v, MATCH_VMV8R_V, MASK_VMV8R_V) DECLARE_INSN(vsrl_vi, MATCH_VSRL_VI, MASK_VSRL_VI) DECLARE_INSN(vsra_vi, MATCH_VSRA_VI, MASK_VSRA_VI) DECLARE_INSN(vssrl_vi, MATCH_VSSRL_VI, MASK_VSSRL_VI) diff --git a/riscv/insns/vl1r_v.h b/riscv/insns/vl1r_v.h index 8dabcb6..eded573 100644 --- a/riscv/insns/vl1r_v.h +++ b/riscv/insns/vl1r_v.h @@ -3,9 +3,7 @@ require_vector; const reg_t baseAddr = RS1; const reg_t vd = insn.rd(); for (reg_t i = 0; i < P.VU.vlenb; ++i) { - auto val = MMU.load_uint8(baseAddr + i); - fprintf(stderr, "here: %ld: %x\n", i, val); P.VU.elt(vd, i) = val; } P.VU.vstart = 0; diff --git a/riscv/insns/vmv1r_v.h b/riscv/insns/vmv1r_v.h new file mode 100644 index 0000000..bbdeab9 --- /dev/null +++ b/riscv/insns/vmv1r_v.h @@ -0,0 +1,2 @@ +// vmv1r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv2r_v.h b/riscv/insns/vmv2r_v.h new file mode 100644 index 0000000..1ac8e09 --- /dev/null +++ b/riscv/insns/vmv2r_v.h @@ -0,0 +1,2 @@ +// vmv2r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv4r_v.h b/riscv/insns/vmv4r_v.h new file mode 100644 index 0000000..2068731 --- /dev/null +++ b/riscv/insns/vmv4r_v.h @@ -0,0 +1,2 @@ +// vmv4r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmv8r_v.h b/riscv/insns/vmv8r_v.h new file mode 100644 index 0000000..2b205fc --- /dev/null +++ b/riscv/insns/vmv8r_v.h @@ -0,0 +1,2 @@ +// vmv8r.v vd, vs2 +#include "vmvnfr_v.h" diff --git a/riscv/insns/vmvnfr_v.h b/riscv/insns/vmvnfr_v.h new file mode 100644 index 0000000..6ae66d5 --- /dev/null +++ b/riscv/insns/vmvnfr_v.h @@ -0,0 +1,12 @@ +// vmv1r.v vd, vs2 +require_vector; +const reg_t baseAddr = RS1; +const reg_t vd = insn.rd(); +const reg_t vs2 = insn.rs2(); +const reg_t len = insn.rs1() + 1; +require((vd & (len - 1)) == 0); +require((vs2 & (len - 1)) == 0); +if (vd != vs2) + memcpy(&P.VU.elt(vd, 0), + &P.VU.elt(vs2, 0), P.VU.vlenb * len); +P.VU.vstart = 0; diff --git a/riscv/riscv.mk.in b/riscv/riscv.mk.in index db1bdcc..252b196 100644 --- a/riscv/riscv.mk.in +++ b/riscv/riscv.mk.in @@ -384,6 +384,10 @@ riscv_insn_ext_v_alu_int = \ vmv_v_v \ vmv_v_x \ vmv_x_s \ + vmv1r_v \ + vmv2r_v \ + vmv4r_v \ + vmv8r_v \ vmxnor_mm \ vmxor_mm \ vnclip_wi \ diff --git a/spike_main/disasm.cc b/spike_main/disasm.cc index 5ecad58..addc223 100644 --- a/spike_main/disasm.cc +++ b/spike_main/disasm.cc @@ -911,10 +911,10 @@ disassembler_t::disassembler_t(int xlen) DISASM_OPIV_VX__INSN(vssubu, 0); DISASM_OPIV_VX__INSN(vssub, 1); DISASM_OPIV_VXI_INSN(vsll, 1, v); - DISASM_OPIV_VX__INSN(vaaddu, 0); - DISASM_OPIV_VX__INSN(vaadd, 0); - DISASM_OPIV_VX__INSN(vasubu, 0); - DISASM_OPIV_VX__INSN(vasub, 0); + DISASM_INSN("vmv1r.v", vmv1r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv2r.v", vmv2r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv4r.v", vmv4r_v, 0, {&vd, &vs2}); + DISASM_INSN("vmv8r.v", vmv8r_v, 0, {&vd, &vs2}); DISASM_OPIV_VX__INSN(vsmul, 1); DISASM_OPIV_VXI_INSN(vsrl, 0, v); DISASM_OPIV_VXI_INSN(vsra, 0, v); @@ -937,6 +937,11 @@ disassembler_t::disassembler_t(int xlen) //OPMVV/OPMVX //0b00_0000 + DISASM_OPIV_VX__INSN(vaaddu, 0); + DISASM_OPIV_VX__INSN(vaadd, 0); + DISASM_OPIV_VX__INSN(vasubu, 0); + DISASM_OPIV_VX__INSN(vasub, 0); + DISASM_OPIV_S___INSN(vredsum, 1); DISASM_OPIV_S___INSN(vredand, 1); DISASM_OPIV_S___INSN(vredor, 1); -- cgit v1.1 From fa2f63818aff194f96a0b81da103d6a4170173b2 Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Mon, 13 Jan 2020 00:52:40 -0800 Subject: rvv: segment load/store needs to check destination range Signed-off-by: Chih-Min Chao --- riscv/decode.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/riscv/decode.h b/riscv/decode.h index 0076145..21bb92b 100644 --- a/riscv/decode.h +++ b/riscv/decode.h @@ -1464,7 +1464,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vs3 = insn.rd(); \ - require(vs3 + nf <= NVPR); \ + require(vs3 + nf * P.VU.vlmul <= NVPR); \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i) \ @@ -1497,7 +1497,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ const reg_t vl = P.VU.vl; \ const reg_t baseAddr = RS1; \ const reg_t vd = insn.rd(); \ - require(vd + nf <= NVPR); \ + require(vd + nf * P.VU.vlmul <= NVPR); \ const reg_t vlmul = P.VU.vlmul; \ for (reg_t i = 0; i < vl; ++i) { \ VI_ELEMENT_SKIP(i); \ @@ -1549,6 +1549,7 @@ for (reg_t i = 0; i < vlmax; ++i) { \ const reg_t rd_num = insn.rd(); \ bool early_stop = false; \ const reg_t vlmul = P.VU.vlmul; \ + require(rd_num + nf * P.VU.vlmul <= NVPR); \ p->VU.vstart = 0; \ for (reg_t i = 0; i < vl; ++i) { \ VI_STRIP(i); \ -- cgit v1.1 From e75ba052d42b1af954c09adc815b541124c2ccce Mon Sep 17 00:00:00 2001 From: Chih-Min Chao Date: Thu, 9 Jan 2020 06:50:04 -0800 Subject: doc: update vector extension version 0.8 is officially released. Signed-off-by: Chih-Min Chao --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 3db8cf7..2e9e0ac 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ Spike supports the following RISC-V ISA features: - D extension, v2.2 - Q extension, v2.2 - C extension, v2.0 - - V extension, v0.8-draft-20191118, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_) + - V extension, v0.8, w/ Zvlsseg, w/o Zvamo/Zvediv, (_requires a 64-bit host_) - Conformance to both RVWMO and RVTSO (Spike is sequentially consistent) - Machine, Supervisor, and User modes, v1.11 - Debug v0.14 -- cgit v1.1