diff options
author | Taylor Simpson <tsimpson@quicinc.com> | 2021-04-08 20:07:52 -0500 |
---|---|---|
committer | Richard Henderson <richard.henderson@linaro.org> | 2021-05-01 16:06:09 -0700 |
commit | 0d0b91a8049967f5e3bfd86e9d372d61b4019b77 (patch) | |
tree | 835927b9ef4a4e302d2499504825ecdbca5c9d50 /target | |
parent | af7f1821273c45a6101735023736882ec0399e86 (diff) | |
download | qemu-0d0b91a8049967f5e3bfd86e9d372d61b4019b77.zip qemu-0d0b91a8049967f5e3bfd86e9d372d61b4019b77.tar.gz qemu-0d0b91a8049967f5e3bfd86e9d372d61b4019b77.tar.bz2 |
Hexagon (target/hexagon) load and unpack bytes instructions
The following instructions are added
L2_loadbzw2_io Rd32 = memubh(Rs32+#s11:1)
L2_loadbzw4_io Rdd32 = memubh(Rs32+#s11:1)
L2_loadbsw2_io Rd32 = membh(Rs32+#s11:1)
L2_loadbsw4_io Rdd32 = membh(Rs32+#s11:1)
L4_loadbzw2_ur Rd32 = memubh(Rt32<<#u2+#U6)
L4_loadbzw4_ur Rdd32 = memubh(Rt32<<#u2+#U6)
L4_loadbsw2_ur Rd32 = membh(Rt32<<#u2+#U6)
L4_loadbsw4_ur Rdd32 = membh(Rt32<<#u2+#U6)
L4_loadbzw2_ap Rd32 = memubh(Re32=#U6)
L4_loadbzw4_ap Rdd32 = memubh(Re32=#U6)
L4_loadbsw2_ap Rd32 = membh(Re32=#U6)
L4_loadbsw4_ap Rdd32 = membh(Re32=#U6)
L2_loadbzw2_pr Rd32 = memubh(Rx32++Mu2)
L2_loadbzw4_pr Rdd32 = memubh(Rx32++Mu2)
L2_loadbsw2_pr Rd32 = membh(Rx32++Mu2)
L2_loadbsw4_pr Rdd32 = membh(Rx32++Mu2)
L2_loadbzw2_pbr Rd32 = memubh(Rx32++Mu2:brev)
L2_loadbzw4_pbr Rdd32 = memubh(Rx32++Mu2:brev)
L2_loadbsw2_pbr Rd32 = membh(Rx32++Mu2:brev)
L2_loadbsw4_pbr Rdd32 = membh(Rx32++Mu2:brev)
L2_loadbzw2_pi Rd32 = memubh(Rx32++#s4:1)
L2_loadbzw4_pi Rdd32 = memubh(Rx32++#s4:1)
L2_loadbsw2_pi Rd32 = membh(Rx32++#s4:1)
L2_loadbsw4_pi Rdd32 = membh(Rx32++#s4:1)
L2_loadbzw2_pci Rd32 = memubh(Rx32++#s4:1:circ(Mu2))
L2_loadbzw4_pci Rdd32 = memubh(Rx32++#s4:1:circ(Mu2))
L2_loadbsw2_pci Rd32 = membh(Rx32++#s4:1:circ(Mu2))
L2_loadbsw4_pci Rdd32 = membh(Rx32++#s4:1:circ(Mu2))
L2_loadbzw2_pcr Rd32 = memubh(Rx32++I:circ(Mu2))
L2_loadbzw4_pcr Rdd32 = memubh(Rx32++I:circ(Mu2))
L2_loadbsw2_pcr Rd32 = membh(Rx32++I:circ(Mu2))
L2_loadbsw4_pcr Rdd32 = membh(Rx32++I:circ(Mu2))
Test cases in tests/tcg/hexagon/load_unpack.c
Signed-off-by: Taylor Simpson <tsimpson@quicinc.com>
Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Message-Id: <1617930474-31979-25-git-send-email-tsimpson@quicinc.com>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
Diffstat (limited to 'target')
-rw-r--r-- | target/hexagon/gen_tcg.h | 108 | ||||
-rw-r--r-- | target/hexagon/genptr.c | 13 | ||||
-rw-r--r-- | target/hexagon/imported/encode_pp.def | 6 | ||||
-rw-r--r-- | target/hexagon/imported/ldst.idef | 43 | ||||
-rw-r--r-- | target/hexagon/macros.h | 16 |
5 files changed, 186 insertions, 0 deletions
diff --git a/target/hexagon/gen_tcg.h b/target/hexagon/gen_tcg.h index 8f0ec01..1120aae 100644 --- a/target/hexagon/gen_tcg.h +++ b/target/hexagon/gen_tcg.h @@ -153,6 +153,114 @@ #define fGEN_TCG_L2_loadrd_pi(SHORTCODE) SHORTCODE /* + * These instructions load 2 bytes and places them in + * two halves of the destination register. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw2(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 2, u, EA, tmp); \ + tcg_gen_movi_tl(RdV, 0); \ + for (int i = 0; i < 2; i++) { \ + gen_set_half(i, RdV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw2_io(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw2_ur(SHORTCODE) \ + fGEN_TCG_loadbXw2(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L4_loadbzw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw2_ap(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw2_pr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw2_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw2_pi(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pi, true) +#define fGEN_TCG_L2_loadbzw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw2_pci(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), false) +#define fGEN_TCG_L2_loadbsw2_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw2(GET_EA_pcr(1), true) + +/* + * These instructions load 4 bytes and places them in + * four halves of the destination register pair. + * The GET_EA macro determines the addressing mode. + * The SIGN argument determines whether to zero-extend or + * sign-extend. + */ +#define fGEN_TCG_loadbXw4(GET_EA, SIGN) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + TCGv byte = tcg_temp_new(); \ + GET_EA; \ + fLOAD(1, 4, u, EA, tmp); \ + tcg_gen_movi_i64(RddV, 0); \ + for (int i = 0; i < 4; i++) { \ + gen_set_half_i64(i, RddV, gen_get_byte(byte, i, tmp, (SIGN))); \ + } \ + tcg_temp_free(tmp); \ + tcg_temp_free(byte); \ + } while (0) + +#define fGEN_TCG_L2_loadbzw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), false) +#define fGEN_TCG_L4_loadbzw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), false) +#define fGEN_TCG_L2_loadbsw4_io(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_RI(RsV, siV), true) +#define fGEN_TCG_L4_loadbsw4_ur(SHORTCODE) \ + fGEN_TCG_loadbXw4(fEA_IRs(UiV, RtV, uiV), true) +#define fGEN_TCG_L2_loadbzw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, false) +#define fGEN_TCG_L2_loadbsw4_pci(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pci, true) +#define fGEN_TCG_L2_loadbzw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), false) +#define fGEN_TCG_L2_loadbsw4_pcr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pcr(2), true) +#define fGEN_TCG_L4_loadbzw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, false) +#define fGEN_TCG_L2_loadbzw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, false) +#define fGEN_TCG_L2_loadbzw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, false) +#define fGEN_TCG_L2_loadbzw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, false) +#define fGEN_TCG_L4_loadbsw4_ap(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_ap, true) +#define fGEN_TCG_L2_loadbsw4_pr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pr, true) +#define fGEN_TCG_L2_loadbsw4_pbr(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pbr, true) +#define fGEN_TCG_L2_loadbsw4_pi(SHORTCODE) \ + fGEN_TCG_loadbXw4(GET_EA_pi, true) + +/* * Predicated loads * Here is a primer to understand the tag names * diff --git a/target/hexagon/genptr.c b/target/hexagon/genptr.c index c6928d6..f93f895 100644 --- a/target/hexagon/genptr.c +++ b/target/hexagon/genptr.c @@ -300,6 +300,19 @@ static inline TCGv gen_get_half(TCGv result, int N, TCGv src, bool sign) return result; } +static inline void gen_set_half(int N, TCGv result, TCGv src) +{ + tcg_gen_deposit_tl(result, result, src, N * 16, 16); +} + +static inline void gen_set_half_i64(int N, TCGv_i64 result, TCGv src) +{ + TCGv_i64 src64 = tcg_temp_new_i64(); + tcg_gen_extu_i32_i64(src64, src); + tcg_gen_deposit_i64(result, result, src64, N * 16, 16); + tcg_temp_free_i64(src64); +} + static void gen_set_byte_i64(int N, TCGv_i64 result, TCGv src) { TCGv_i64 src64 = tcg_temp_new_i64(); diff --git a/target/hexagon/imported/encode_pp.def b/target/hexagon/imported/encode_pp.def index 4464926..e3582eb 100644 --- a/target/hexagon/imported/encode_pp.def +++ b/target/hexagon/imported/encode_pp.def @@ -342,6 +342,12 @@ DEF_ENC32(L4_pload##TAG##fnew_abs,ICLASS_LD" 1 11 "OPC" iiiii PP111tti 1--ddd /* 0 000 misc: dealloc,loadw_locked,dcfetch */ +STD_LD_ENC(bzw4,"0 101") +STD_LD_ENC(bzw2,"0 011") + +STD_LD_ENC(bsw4,"0 111") +STD_LD_ENC(bsw2,"0 001") + STD_LD_ENC(rb, "1 000") STD_LD_ENC(rub, "1 001") STD_LD_ENC(rh, "1 010") diff --git a/target/hexagon/imported/ldst.idef b/target/hexagon/imported/ldst.idef index fe7e018..95c0470 100644 --- a/target/hexagon/imported/ldst.idef +++ b/target/hexagon/imported/ldst.idef @@ -38,6 +38,49 @@ STD_LD_AMODES(loadrh, "Rd32=memh", "Load signed Half integer",ATTRIBS(A_LOAD),"1 STD_LD_AMODES(loadri, "Rd32=memw", "Load Word",ATTRIBS(A_LOAD),"2",fLOAD(1,4,u,EA,RdV),2) STD_LD_AMODES(loadrd, "Rdd32=memd","Load Double integer",ATTRIBS(A_LOAD),"3",fLOAD(1,8,u,EA,RddV),3) +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbzw2, "Rd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETUBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbzw4, "Rdd32=memubh", "Load Bytes and Vector Zero-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETUBYTE(i,tmpV)); + } +},2) + + + +/* These instructions do a load an unpack */ +STD_LD_AMODES(loadbsw2, "Rd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"1", +{fHIDE(size2u_t tmpV; int i;) + fLOAD(1,2,u,EA,tmpV); + for (i=0;i<2;i++) { + fSETHALF(i,RdV,fGETBYTE(i,tmpV)); + } +},1) + +STD_LD_AMODES(loadbsw4, "Rdd32=membh", "Load Bytes and Vector Sign-Extend (unpack)", +ATTRIBS(A_LOAD),"2", +{fHIDE(size4u_t tmpV; int i;) + fLOAD(1,4,u,EA,tmpV); + for (i=0;i<4;i++) { + fSETHALF(i,RddV,fGETBYTE(i,tmpV)); + } +},2) + + + + /* The set of addressing modes standard to all Store instructions */ #define STD_ST_AMODES(TAG,DEST,OPER,DESCR,ATTRIB,SHFT,SEMANTICS,SCALE)\ Q6INSN(S2_##TAG##_io, OPER"(Rs32+#s11:"SHFT")="DEST, ATTRIB,DESCR,{fIMMEXT(siV); fEA_RI(RsV,siV); SEMANTICS; })\ diff --git a/target/hexagon/macros.h b/target/hexagon/macros.h index 30c8951..ec5bf60 100644 --- a/target/hexagon/macros.h +++ b/target/hexagon/macros.h @@ -465,6 +465,21 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) #define fCAST8S_16S(A) (int128_exts64(A)) #define fCAST16S_8S(A) (int128_getlo(A)) +#ifdef QEMU_GENERATE +#define fEA_RI(REG, IMM) tcg_gen_addi_tl(EA, REG, IMM) +#define fEA_RRs(REG, REG2, SCALE) \ + do { \ + TCGv tmp = tcg_temp_new(); \ + tcg_gen_shli_tl(tmp, REG2, SCALE); \ + tcg_gen_add_tl(EA, REG, tmp); \ + tcg_temp_free(tmp); \ + } while (0) +#define fEA_IRs(IMM, REG, SCALE) \ + do { \ + tcg_gen_shli_tl(EA, REG, SCALE); \ + tcg_gen_addi_tl(EA, EA, IMM); \ + } while (0) +#else #define fEA_RI(REG, IMM) \ do { \ EA = REG + IMM; \ @@ -477,6 +492,7 @@ static inline TCGv gen_read_ireg(TCGv result, TCGv val, int shift) do { \ EA = IMM + (REG << SCALE); \ } while (0) +#endif #ifdef QEMU_GENERATE #define fEA_IMM(IMM) tcg_gen_movi_tl(EA, IMM) |