diff options
author | Stefan Hajnoczi <stefanha@redhat.com> | 2023-09-25 10:09:04 -0400 |
---|---|---|
committer | Stefan Hajnoczi <stefanha@redhat.com> | 2023-09-25 10:09:04 -0400 |
commit | bf94b63d76bafd452d536c3f45cdfdefb98045dc (patch) | |
tree | df87846b49689cd4b68ecb456e63d37a5d7b50ed | |
parent | b55e4b9c0525560577384adfc6d30eb0daa8d7be (diff) | |
parent | 231f6a7d66254a58bedbee458591b780e0a507b1 (diff) | |
download | qemu-bf94b63d76bafd452d536c3f45cdfdefb98045dc.zip qemu-bf94b63d76bafd452d536c3f45cdfdefb98045dc.tar.gz qemu-bf94b63d76bafd452d536c3f45cdfdefb98045dc.tar.bz2 |
Merge tag 'pull-target-arm-20230921' of https://git.linaro.org/people/pmaydell/qemu-arm into staging
target-arm queue:
* target/m68k: Add URL to semihosting spec
* docs/devel/loads-stores: Fix git grep regexes
* hw/arm/boot: Set SCR_EL3.FGTEn when booting kernel
* linux-user: Correct SME feature names reported in cpuinfo
* linux-user: Add missing arm32 hwcaps
* Don't skip MTE checks for LDRT/STRT at EL0
* Implement FEAT_HBC
* Implement FEAT_MOPS
* audio/jackaudio: Avoid dynamic stack allocation
* sbsa-ref: add non-secure EL2 virtual timer
* elf2dmp: improve Win2022, Win11 and large dumps
# -----BEGIN PGP SIGNATURE-----
#
# iQJNBAABCAA3FiEE4aXFk81BneKOgxXPPCUl7RQ2DN4FAmUMfwAZHHBldGVyLm1h
# eWRlbGxAbGluYXJvLm9yZwAKCRA8JSXtFDYM3jvnD/0QE/oOxfr+wkDUkTasSwVc
# UNfhObMj3h8x2XApqXckXnckew97I7hh7OLk35p9Ncea7fb6CvGMZ/DJir7AG4aQ
# Anpd5g2Qo0AMfPIyvoJ5pgtqZ1aS/EpBfYixmjL/zY6+zNzoVzWG/KfL+XamW6ir
# 6U7EqcAUzfX0+Splcxs5WgCDI5nGtn0B42EwOMpmwsH4opfr6HTn8Rzbn9gIwKU7
# u82PaKAqWPYD0ev9NQra+VVTrrFS4SCcqkV+SoYu0Cg5vvBlgAVcx0Zz2objp9LC
# 96fOtFH4Rch611j87WiGvN+fxQawqYzAYdy2y+j0wwuonTH9G3PpdZZT0557NjeS
# rFpW2UQebDqZ3ZTDwhzefsVKc3emLZtEd+RFa/YcDtao0afKfbSHv5A2/pGHxzlv
# 8psKOOH82WXTOHwFKA2o0lXDAauzirY+1Avy0vozNzPCdErXPgMHY4tABU77PpER
# Pz17jJO9C1AGyQVF+o09ieJR2Du5Wb2LLcZP3+5Ctm0SNVmREKKNcMkhJiEM9snm
# PQBR7FNEbAuQAO2MDK70dWUcTNtOv4Q1jgTR+aYd2MrArxCmAA5Zd9gjeYDwv6XH
# n242ONDAhlG1fY5f5giE3vCrcV1FDbvHEn6GDVilgMrF3a3Iw30xUaATiO09hIfi
# XAwGwLtMsp21WDa5PsfZVw==
# =dalQ
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 21 Sep 2023 13:36:00 EDT
# gpg: using RSA key E1A5C593CD419DE28E8315CF3C2525ED14360CDE
# gpg: issuer "peter.maydell@linaro.org"
# gpg: Good signature from "Peter Maydell <peter.maydell@linaro.org>" [full]
# gpg: aka "Peter Maydell <pmaydell@gmail.com>" [full]
# gpg: aka "Peter Maydell <pmaydell@chiark.greenend.org.uk>" [full]
# gpg: aka "Peter Maydell <peter@archaic.org.uk>" [unknown]
# Primary key fingerprint: E1A5 C593 CD41 9DE2 8E83 15CF 3C25 25ED 1436 0CDE
* tag 'pull-target-arm-20230921' of https://git.linaro.org/people/pmaydell/qemu-arm: (30 commits)
elf2dmp: rework PDB_STREAM_INDEXES::segments obtaining
elf2dmp: use Linux mmap with MAP_NORESERVE when possible
elf2dmp: introduce merging of physical memory runs
elf2dmp: introduce physical block alignment
elf2dmp: replace PE export name check with PDB name check
sbsa-ref: add non-secure EL2 virtual timer
audio/jackaudio: Avoid dynamic stack allocation in qjack_process()
audio/jackaudio: Avoid dynamic stack allocation in qjack_client_init
target/arm: Enable FEAT_MOPS for CPU 'max'
target/arm: Implement the CPY* instructions
target/arm: Implement MTE tag-checking functions for FEAT_MOPS copies
target/arm: Implement the SETG* instructions
target/arm: Define new TB flag for ATA0
target/arm: Implement the SET* instructions
target/arm: Implement MTE tag-checking functions for FEAT_MOPS
target/arm: New function allocation_tag_mem_probe()
target/arm: Define syndrome function for MOPS exceptions
target/arm: Pass unpriv bool to get_a64_user_mem_index()
target/arm: Implement FEAT_MOPS enable bits
target/arm: Don't skip MTE checks for LDRT/STRT at EL0
...
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
-rw-r--r-- | audio/jackaudio.c | 21 | ||||
-rw-r--r-- | contrib/elf2dmp/addrspace.c | 31 | ||||
-rw-r--r-- | contrib/elf2dmp/addrspace.h | 1 | ||||
-rw-r--r-- | contrib/elf2dmp/main.c | 154 | ||||
-rw-r--r-- | contrib/elf2dmp/pdb.c | 15 | ||||
-rw-r--r-- | contrib/elf2dmp/pdb.h | 2 | ||||
-rw-r--r-- | contrib/elf2dmp/qemu_elf.c | 68 | ||||
-rw-r--r-- | contrib/elf2dmp/qemu_elf.h | 2 | ||||
-rw-r--r-- | docs/devel/loads-stores.rst | 40 | ||||
-rw-r--r-- | docs/system/arm/emulation.rst | 2 | ||||
-rw-r--r-- | hw/arm/boot.c | 4 | ||||
-rw-r--r-- | hw/arm/sbsa-ref.c | 2 | ||||
-rw-r--r-- | linux-user/elfload.c | 72 | ||||
-rw-r--r-- | target/arm/cpu.h | 35 | ||||
-rw-r--r-- | target/arm/helper.c | 41 | ||||
-rw-r--r-- | target/arm/internals.h | 55 | ||||
-rw-r--r-- | target/arm/syndrome.h | 12 | ||||
-rw-r--r-- | target/arm/tcg/a64.decode | 38 | ||||
-rw-r--r-- | target/arm/tcg/cpu64.c | 5 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.c | 878 | ||||
-rw-r--r-- | target/arm/tcg/helper-a64.h | 14 | ||||
-rw-r--r-- | target/arm/tcg/hflags.c | 21 | ||||
-rw-r--r-- | target/arm/tcg/mte_helper.c | 281 | ||||
-rw-r--r-- | target/arm/tcg/translate-a64.c | 164 | ||||
-rw-r--r-- | target/arm/tcg/translate.h | 4 | ||||
-rw-r--r-- | target/m68k/m68k-semi.c | 4 | ||||
-rw-r--r-- | tests/tcg/aarch64/sysregs.c | 4 |
27 files changed, 1769 insertions, 201 deletions
diff --git a/audio/jackaudio.c b/audio/jackaudio.c index 5bdf3d7..e1eaa34 100644 --- a/audio/jackaudio.c +++ b/audio/jackaudio.c @@ -70,6 +70,9 @@ typedef struct QJackClient { int buffersize; jack_port_t **port; QJackBuffer fifo; + + /* Used as workspace by qjack_process() */ + float **process_buffers; } QJackClient; @@ -267,22 +270,21 @@ static int qjack_process(jack_nframes_t nframes, void *arg) } /* get the buffers for the ports */ - float *buffers[c->nchannels]; for (int i = 0; i < c->nchannels; ++i) { - buffers[i] = jack_port_get_buffer(c->port[i], nframes); + c->process_buffers[i] = jack_port_get_buffer(c->port[i], nframes); } if (c->out) { if (likely(c->enabled)) { - qjack_buffer_read_l(&c->fifo, buffers, nframes); + qjack_buffer_read_l(&c->fifo, c->process_buffers, nframes); } else { for (int i = 0; i < c->nchannels; ++i) { - memset(buffers[i], 0, nframes * sizeof(float)); + memset(c->process_buffers[i], 0, nframes * sizeof(float)); } } } else { if (likely(c->enabled)) { - qjack_buffer_write_l(&c->fifo, buffers, nframes); + qjack_buffer_write_l(&c->fifo, c->process_buffers, nframes); } } @@ -400,7 +402,8 @@ static void qjack_client_connect_ports(QJackClient *c) static int qjack_client_init(QJackClient *c) { jack_status_t status; - char client_name[jack_client_name_size()]; + int client_name_len = jack_client_name_size(); /* includes NUL */ + g_autofree char *client_name = g_new(char, client_name_len); jack_options_t options = JackNullOption; if (c->state == QJACK_STATE_RUNNING) { @@ -409,7 +412,7 @@ static int qjack_client_init(QJackClient *c) c->connect_ports = true; - snprintf(client_name, sizeof(client_name), "%s-%s", + snprintf(client_name, client_name_len, "%s-%s", c->out ? "out" : "in", c->opt->client_name ? c->opt->client_name : audio_application_name()); @@ -447,6 +450,9 @@ static int qjack_client_init(QJackClient *c) jack_get_client_name(c->client)); } + /* Allocate working buffer for process callback */ + c->process_buffers = g_new(float *, c->nchannels); + jack_set_process_callback(c->client, qjack_process , c); jack_set_port_registration_callback(c->client, qjack_port_registration, c); jack_set_xrun_callback(c->client, qjack_xrun, c); @@ -578,6 +584,7 @@ static void qjack_client_fini_locked(QJackClient *c) qjack_buffer_free(&c->fifo); g_free(c->port); + g_free(c->process_buffers); c->state = QJACK_STATE_DISCONNECTED; /* fallthrough */ diff --git a/contrib/elf2dmp/addrspace.c b/contrib/elf2dmp/addrspace.c index 0b04cba..64b5d68 100644 --- a/contrib/elf2dmp/addrspace.c +++ b/contrib/elf2dmp/addrspace.c @@ -14,7 +14,7 @@ static struct pa_block *pa_space_find_block(struct pa_space *ps, uint64_t pa) for (i = 0; i < ps->block_nr; i++) { if (ps->block[i].paddr <= pa && - pa <= ps->block[i].paddr + ps->block[i].size) { + pa < ps->block[i].paddr + ps->block[i].size) { return ps->block + i; } } @@ -33,6 +33,30 @@ static uint8_t *pa_space_resolve(struct pa_space *ps, uint64_t pa) return block->addr + (pa - block->paddr); } +static void pa_block_align(struct pa_block *b) +{ + uint64_t low_align = ((b->paddr - 1) | ELF2DMP_PAGE_MASK) + 1 - b->paddr; + uint64_t high_align = (b->paddr + b->size) & ELF2DMP_PAGE_MASK; + + if (low_align == 0 && high_align == 0) { + return; + } + + if (low_align + high_align < b->size) { + printf("Block 0x%"PRIx64"+:0x%"PRIx64" will be aligned to " + "0x%"PRIx64"+:0x%"PRIx64"\n", b->paddr, b->size, + b->paddr + low_align, b->size - low_align - high_align); + b->size -= low_align + high_align; + } else { + printf("Block 0x%"PRIx64"+:0x%"PRIx64" is too small to align\n", + b->paddr, b->size); + b->size = 0; + } + + b->addr += low_align; + b->paddr += low_align; +} + int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) { Elf64_Half phdr_nr = elf_getphdrnum(qemu_elf->map); @@ -60,10 +84,13 @@ int pa_space_create(struct pa_space *ps, QEMU_Elf *qemu_elf) .paddr = phdr[i].p_paddr, .size = phdr[i].p_filesz, }; - block_i++; + pa_block_align(&ps->block[block_i]); + block_i = ps->block[block_i].size ? (block_i + 1) : block_i; } } + ps->block_nr = block_i; + return 0; } diff --git a/contrib/elf2dmp/addrspace.h b/contrib/elf2dmp/addrspace.h index 00b44c1..039c70c 100644 --- a/contrib/elf2dmp/addrspace.h +++ b/contrib/elf2dmp/addrspace.h @@ -12,6 +12,7 @@ #define ELF2DMP_PAGE_BITS 12 #define ELF2DMP_PAGE_SIZE (1ULL << ELF2DMP_PAGE_BITS) +#define ELF2DMP_PAGE_MASK (ELF2DMP_PAGE_SIZE - 1) #define ELF2DMP_PFN_MASK (~(ELF2DMP_PAGE_SIZE - 1)) #define INVALID_PA UINT64_MAX diff --git a/contrib/elf2dmp/main.c b/contrib/elf2dmp/main.c index 6d4d185..5db163b 100644 --- a/contrib/elf2dmp/main.c +++ b/contrib/elf2dmp/main.c @@ -20,6 +20,7 @@ #define PE_NAME "ntoskrnl.exe" #define INITIAL_MXCSR 0x1f80 +#define MAX_NUMBER_OF_RUNS 42 typedef struct idt_desc { uint16_t offset1; /* offset bits 0..15 */ @@ -234,6 +235,42 @@ static int fix_dtb(struct va_space *vs, QEMU_Elf *qe) return 1; } +static void try_merge_runs(struct pa_space *ps, + WinDumpPhyMemDesc64 *PhysicalMemoryBlock) +{ + unsigned int merge_cnt = 0, run_idx = 0; + + PhysicalMemoryBlock->NumberOfRuns = 0; + + for (size_t idx = 0; idx < ps->block_nr; idx++) { + struct pa_block *blk = ps->block + idx; + struct pa_block *next = blk + 1; + + PhysicalMemoryBlock->NumberOfPages += blk->size / ELF2DMP_PAGE_SIZE; + + if (idx + 1 != ps->block_nr && blk->paddr + blk->size == next->paddr) { + printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be" + " merged\n", idx, blk->paddr, blk->size, merge_cnt); + merge_cnt++; + } else { + struct pa_block *first_merged = blk - merge_cnt; + + printf("Block #%zu 0x%"PRIx64"+:0x%"PRIx64" and %u previous will be" + " merged to 0x%"PRIx64"+:0x%"PRIx64" (run #%u)\n", + idx, blk->paddr, blk->size, merge_cnt, first_merged->paddr, + blk->paddr + blk->size - first_merged->paddr, run_idx); + PhysicalMemoryBlock->Run[run_idx] = (WinDumpPhyMemRun64) { + .BasePage = first_merged->paddr / ELF2DMP_PAGE_SIZE, + .PageCount = (blk->paddr + blk->size - first_merged->paddr) / + ELF2DMP_PAGE_SIZE, + }; + PhysicalMemoryBlock->NumberOfRuns++; + run_idx++; + merge_cnt = 0; + } + } +} + static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, struct va_space *vs, uint64_t KdDebuggerDataBlock, KDDEBUGGER_DATA64 *kdbg, uint64_t KdVersionBlock, int nr_cpus) @@ -244,7 +281,6 @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, KUSD_OFFSET_PRODUCT_TYPE); DBGKD_GET_VERSION64 kvb; WinDumpHeader64 h; - size_t i; QEMU_BUILD_BUG_ON(KUSD_OFFSET_SUITE_MASK >= ELF2DMP_PAGE_SIZE); QEMU_BUILD_BUG_ON(KUSD_OFFSET_PRODUCT_TYPE >= ELF2DMP_PAGE_SIZE); @@ -282,13 +318,17 @@ static int fill_header(WinDumpHeader64 *hdr, struct pa_space *ps, .RequiredDumpSpace = sizeof(h), }; - for (i = 0; i < ps->block_nr; i++) { - h.PhysicalMemoryBlock.NumberOfPages += - ps->block[i].size / ELF2DMP_PAGE_SIZE; - h.PhysicalMemoryBlock.Run[i] = (WinDumpPhyMemRun64) { - .BasePage = ps->block[i].paddr / ELF2DMP_PAGE_SIZE, - .PageCount = ps->block[i].size / ELF2DMP_PAGE_SIZE, - }; + if (h.PhysicalMemoryBlock.NumberOfRuns <= MAX_NUMBER_OF_RUNS) { + for (size_t idx = 0; idx < ps->block_nr; idx++) { + h.PhysicalMemoryBlock.NumberOfPages += + ps->block[idx].size / ELF2DMP_PAGE_SIZE; + h.PhysicalMemoryBlock.Run[idx] = (WinDumpPhyMemRun64) { + .BasePage = ps->block[idx].paddr / ELF2DMP_PAGE_SIZE, + .PageCount = ps->block[idx].size / ELF2DMP_PAGE_SIZE, + }; + } + } else { + try_merge_runs(ps, &h.PhysicalMemoryBlock); } h.RequiredDumpSpace += @@ -400,9 +440,10 @@ static int write_dump(struct pa_space *ps, for (i = 0; i < ps->block_nr; i++) { struct pa_block *b = &ps->block[i]; - printf("Writing block #%zu/%zu to file...\n", i, ps->block_nr); + printf("Writing block #%zu/%zu of %"PRIu64" bytes to file...\n", i, + ps->block_nr, b->size); if (fwrite(b->addr, b->size, 1, dmp_file) != 1) { - eprintf("Failed to write dump header\n"); + eprintf("Failed to write block\n"); fclose(dmp_file); return 1; } @@ -411,89 +452,64 @@ static int write_dump(struct pa_space *ps, return fclose(dmp_file); } -static bool pe_check_export_name(uint64_t base, void *start_addr, - struct va_space *vs) -{ - IMAGE_EXPORT_DIRECTORY export_dir; - const char *pe_name; - - if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_EXPORT_DIRECTORY, - &export_dir, sizeof(export_dir), vs)) { - return false; - } - - pe_name = va_space_resolve(vs, base + export_dir.Name); - if (!pe_name) { - return false; - } - - return !strcmp(pe_name, PE_NAME); -} - -static int pe_get_pdb_symstore_hash(uint64_t base, void *start_addr, - char *hash, struct va_space *vs) +static bool pe_check_pdb_name(uint64_t base, void *start_addr, + struct va_space *vs, OMFSignatureRSDS *rsds) { const char sign_rsds[4] = "RSDS"; IMAGE_DEBUG_DIRECTORY debug_dir; - OMFSignatureRSDS rsds; - char *pdb_name; - size_t pdb_name_sz; - size_t i; + char pdb_name[sizeof(PDB_NAME)]; if (pe_get_data_dir_entry(base, start_addr, IMAGE_FILE_DEBUG_DIRECTORY, &debug_dir, sizeof(debug_dir), vs)) { eprintf("Failed to get Debug Directory\n"); - return 1; + return false; } if (debug_dir.Type != IMAGE_DEBUG_TYPE_CODEVIEW) { - return 1; + eprintf("Debug Directory type is not CodeView\n"); + return false; } if (va_space_rw(vs, base + debug_dir.AddressOfRawData, - &rsds, sizeof(rsds), 0)) { - return 1; + rsds, sizeof(*rsds), 0)) { + eprintf("Failed to resolve OMFSignatureRSDS\n"); + return false; } - printf("CodeView signature is \'%.4s\'\n", rsds.Signature); - - if (memcmp(&rsds.Signature, sign_rsds, sizeof(sign_rsds))) { - return 1; + if (memcmp(&rsds->Signature, sign_rsds, sizeof(sign_rsds))) { + eprintf("CodeView signature is \'%.4s\', \'%s\' expected\n", + rsds->Signature, sign_rsds); + return false; } - pdb_name_sz = debug_dir.SizeOfData - sizeof(rsds); - pdb_name = malloc(pdb_name_sz); - if (!pdb_name) { - return 1; + if (debug_dir.SizeOfData - sizeof(*rsds) != sizeof(PDB_NAME)) { + eprintf("PDB name size doesn't match\n"); + return false; } if (va_space_rw(vs, base + debug_dir.AddressOfRawData + - offsetof(OMFSignatureRSDS, name), pdb_name, pdb_name_sz, 0)) { - free(pdb_name); - return 1; + offsetof(OMFSignatureRSDS, name), pdb_name, sizeof(PDB_NAME), + 0)) { + eprintf("Failed to resolve PDB name\n"); + return false; } printf("PDB name is \'%s\', \'%s\' expected\n", pdb_name, PDB_NAME); - if (strcmp(pdb_name, PDB_NAME)) { - eprintf("Unexpected PDB name, it seems the kernel isn't found\n"); - free(pdb_name); - return 1; - } - - free(pdb_name); + return !strcmp(pdb_name, PDB_NAME); +} - sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds.guid.a, rsds.guid.b, - rsds.guid.c, rsds.guid.d[0], rsds.guid.d[1]); +static void pe_get_pdb_symstore_hash(OMFSignatureRSDS *rsds, char *hash) +{ + sprintf(hash, "%.08x%.04x%.04x%.02x%.02x", rsds->guid.a, rsds->guid.b, + rsds->guid.c, rsds->guid.d[0], rsds->guid.d[1]); hash += 20; - for (i = 0; i < 6; i++, hash += 2) { - sprintf(hash, "%.02x", rsds.guid.e[i]); + for (unsigned int i = 0; i < 6; i++, hash += 2) { + sprintf(hash, "%.02x", rsds->guid.e[i]); } - sprintf(hash, "%.01x", rsds.age); - - return 0; + sprintf(hash, "%.01x", rsds->age); } int main(int argc, char *argv[]) @@ -515,6 +531,7 @@ int main(int argc, char *argv[]) KDDEBUGGER_DATA64 *kdbg; uint64_t KdVersionBlock; bool kernel_found = false; + OMFSignatureRSDS rsds; if (argc != 3) { eprintf("usage:\n\t%s elf_file dmp_file\n", argv[0]); @@ -562,7 +579,8 @@ int main(int argc, char *argv[]) } if (*(uint16_t *)nt_start_addr == 0x5a4d) { /* MZ */ - if (pe_check_export_name(KernBase, nt_start_addr, &vs)) { + printf("Checking candidate KernBase = 0x%016"PRIx64"\n", KernBase); + if (pe_check_pdb_name(KernBase, nt_start_addr, &vs, &rsds)) { kernel_found = true; break; } @@ -578,11 +596,7 @@ int main(int argc, char *argv[]) printf("KernBase = 0x%016"PRIx64", signature is \'%.2s\'\n", KernBase, (char *)nt_start_addr); - if (pe_get_pdb_symstore_hash(KernBase, nt_start_addr, pdb_hash, &vs)) { - eprintf("Failed to get PDB symbol store hash\n"); - err = 1; - goto out_ps; - } + pe_get_pdb_symstore_hash(&rsds, pdb_hash); sprintf(pdb_url, "%s%s/%s/%s", SYM_URL_BASE, PDB_NAME, pdb_hash, PDB_NAME); printf("PDB URL is %s\n", pdb_url); diff --git a/contrib/elf2dmp/pdb.c b/contrib/elf2dmp/pdb.c index adcfa7e..6ca5086 100644 --- a/contrib/elf2dmp/pdb.c +++ b/contrib/elf2dmp/pdb.c @@ -160,7 +160,7 @@ static void *pdb_ds_read_file(struct pdb_reader* r, uint32_t file_number) static int pdb_init_segments(struct pdb_reader *r) { char *segs; - unsigned stream_idx = r->sidx.segments; + unsigned stream_idx = r->segments; segs = pdb_ds_read_file(r, stream_idx); if (!segs) { @@ -177,9 +177,6 @@ static int pdb_init_symbols(struct pdb_reader *r) { int err = 0; PDB_SYMBOLS *symbols; - PDB_STREAM_INDEXES *sidx = &r->sidx; - - memset(sidx, -1, sizeof(*sidx)); symbols = pdb_ds_read_file(r, 3); if (!symbols) { @@ -188,15 +185,11 @@ static int pdb_init_symbols(struct pdb_reader *r) r->symbols = symbols; - if (symbols->stream_index_size != sizeof(PDB_STREAM_INDEXES)) { - err = 1; - goto out_symbols; - } - - memcpy(sidx, (const char *)symbols + sizeof(PDB_SYMBOLS) + + r->segments = *(uint16_t *)((const char *)symbols + sizeof(PDB_SYMBOLS) + symbols->module_size + symbols->offset_size + symbols->hash_size + symbols->srcmodule_size + - symbols->pdbimport_size + symbols->unknown2_size, sizeof(*sidx)); + symbols->pdbimport_size + symbols->unknown2_size + + offsetof(PDB_STREAM_INDEXES, segments)); /* Read global symbol table */ r->modimage = pdb_ds_read_file(r, symbols->gsym_file); diff --git a/contrib/elf2dmp/pdb.h b/contrib/elf2dmp/pdb.h index 4ea8925..2a50da5 100644 --- a/contrib/elf2dmp/pdb.h +++ b/contrib/elf2dmp/pdb.h @@ -227,7 +227,7 @@ struct pdb_reader { } ds; uint32_t file_used[1024]; PDB_SYMBOLS *symbols; - PDB_STREAM_INDEXES sidx; + uint16_t segments; uint8_t *modimage; char *segs; size_t segs_size; diff --git a/contrib/elf2dmp/qemu_elf.c b/contrib/elf2dmp/qemu_elf.c index ebda60d..de6ad74 100644 --- a/contrib/elf2dmp/qemu_elf.c +++ b/contrib/elf2dmp/qemu_elf.c @@ -165,10 +165,40 @@ static bool check_ehdr(QEMU_Elf *qe) return true; } -int QEMU_Elf_init(QEMU_Elf *qe, const char *filename) +static int QEMU_Elf_map(QEMU_Elf *qe, const char *filename) { +#ifdef CONFIG_LINUX + struct stat st; + int fd; + + printf("Using Linux mmap\n"); + + fd = open(filename, O_RDONLY, 0); + if (fd == -1) { + eprintf("Failed to open ELF dump file \'%s\'\n", filename); + return 1; + } + + if (fstat(fd, &st)) { + eprintf("Failed to get size of ELF dump file\n"); + close(fd); + return 1; + } + qe->size = st.st_size; + + qe->map = mmap(NULL, qe->size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_NORESERVE, fd, 0); + if (qe->map == MAP_FAILED) { + eprintf("Failed to map ELF file\n"); + close(fd); + return 1; + } + + close(fd); +#else GError *gerr = NULL; - int err = 0; + + printf("Using GLib mmap\n"); qe->gmf = g_mapped_file_new(filename, TRUE, &gerr); if (gerr) { @@ -179,29 +209,43 @@ int QEMU_Elf_init(QEMU_Elf *qe, const char *filename) qe->map = g_mapped_file_get_contents(qe->gmf); qe->size = g_mapped_file_get_length(qe->gmf); +#endif + + return 0; +} + +static void QEMU_Elf_unmap(QEMU_Elf *qe) +{ +#ifdef CONFIG_LINUX + munmap(qe->map, qe->size); +#else + g_mapped_file_unref(qe->gmf); +#endif +} + +int QEMU_Elf_init(QEMU_Elf *qe, const char *filename) +{ + if (QEMU_Elf_map(qe, filename)) { + return 1; + } if (!check_ehdr(qe)) { eprintf("Input file has the wrong format\n"); - err = 1; - goto out_unmap; + QEMU_Elf_unmap(qe); + return 1; } if (init_states(qe)) { eprintf("Failed to extract QEMU CPU states\n"); - err = 1; - goto out_unmap; + QEMU_Elf_unmap(qe); + return 1; } return 0; - -out_unmap: - g_mapped_file_unref(qe->gmf); - - return err; } void QEMU_Elf_exit(QEMU_Elf *qe) { exit_states(qe); - g_mapped_file_unref(qe->gmf); + QEMU_Elf_unmap(qe); } diff --git a/contrib/elf2dmp/qemu_elf.h b/contrib/elf2dmp/qemu_elf.h index b2f0d9c..afa75f1 100644 --- a/contrib/elf2dmp/qemu_elf.h +++ b/contrib/elf2dmp/qemu_elf.h @@ -32,7 +32,9 @@ typedef struct QEMUCPUState { int is_system(QEMUCPUState *s); typedef struct QEMU_Elf { +#ifndef CONFIG_LINUX GMappedFile *gmf; +#endif size_t size; void *map; QEMUCPUState **state; diff --git a/docs/devel/loads-stores.rst b/docs/devel/loads-stores.rst index dab6dfa..ec627aa 100644 --- a/docs/devel/loads-stores.rst +++ b/docs/devel/loads-stores.rst @@ -63,12 +63,12 @@ which stores ``val`` to ``ptr`` as an ``{endian}`` order value of size ``sz`` bytes. -Regexes for git grep +Regexes for git grep: - ``\<ld[us]\?[bwlq]\(_[hbl]e\)\?_p\>`` - ``\<st[bwlq]\(_[hbl]e\)\?_p\>`` - ``\<st24\(_[hbl]e\)\?_p\>`` - - ``\<ldn_\([hbl]e\)?_p\>`` - - ``\<stn_\([hbl]e\)?_p\>`` + - ``\<ldn_\([hbl]e\)\?_p\>`` + - ``\<stn_\([hbl]e\)\?_p\>`` ``cpu_{ld,st}*_mmu`` ~~~~~~~~~~~~~~~~~~~~ @@ -121,8 +121,8 @@ store: ``cpu_st{size}{end}_mmu(env, ptr, val, oi, retaddr)`` - ``_le`` : little endian Regexes for git grep: - - ``\<cpu_ld[bwlq](_[bl]e)\?_mmu\>`` - - ``\<cpu_st[bwlq](_[bl]e)\?_mmu\>`` + - ``\<cpu_ld[bwlq]\(_[bl]e\)\?_mmu\>`` + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_mmu\>`` ``cpu_{ld,st}*_mmuidx_ra`` @@ -155,8 +155,8 @@ store: ``cpu_st{size}{end}_mmuidx_ra(env, ptr, val, mmuidx, retaddr)`` - ``_le`` : little endian Regexes for git grep: - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_mmuidx_ra\>`` - - ``\<cpu_st[bwlq](_[bl]e)\?_mmuidx_ra\>`` + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_mmuidx_ra\>`` + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_mmuidx_ra\>`` ``cpu_{ld,st}*_data_ra`` ~~~~~~~~~~~~~~~~~~~~~~~~ @@ -193,8 +193,8 @@ store: ``cpu_st{size}{end}_data_ra(env, ptr, val, ra)`` - ``_le`` : little endian Regexes for git grep: - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data_ra\>`` - - ``\<cpu_st[bwlq](_[bl]e)\?_data_ra\>`` + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_data_ra\>`` + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_data_ra\>`` ``cpu_{ld,st}*_data`` ~~~~~~~~~~~~~~~~~~~~~ @@ -231,9 +231,9 @@ store: ``cpu_st{size}{end}_data(env, ptr, val)`` - ``_be`` : big endian - ``_le`` : little endian -Regexes for git grep - - ``\<cpu_ld[us]\?[bwlq](_[bl]e)\?_data\>`` - - ``\<cpu_st[bwlq](_[bl]e)\?_data\+\>`` +Regexes for git grep: + - ``\<cpu_ld[us]\?[bwlq]\(_[bl]e\)\?_data\>`` + - ``\<cpu_st[bwlq]\(_[bl]e\)\?_data\+\>`` ``cpu_ld*_code`` ~~~~~~~~~~~~~~~~ @@ -296,7 +296,7 @@ swap: ``translator_ld{sign}{size}_swap(env, ptr, swap)`` - ``l`` : 32 bits - ``q`` : 64 bits -Regexes for git grep +Regexes for git grep: - ``\<translator_ld[us]\?[bwlq]\(_swap\)\?\>`` ``helper_{ld,st}*_mmu`` @@ -325,7 +325,7 @@ store: ``helper_{size}_mmu(env, addr, val, opindex, retaddr)`` - ``l`` : 32 bits - ``q`` : 64 bits -Regexes for git grep +Regexes for git grep: - ``\<helper_ld[us]\?[bwlq]_mmu\>`` - ``\<helper_st[bwlq]_mmu\>`` @@ -382,7 +382,7 @@ succeeded using a MemTxResult return code. The ``_{endian}`` suffix is omitted for byte accesses. -Regexes for git grep +Regexes for git grep: - ``\<address_space_\(read\|write\|rw\)\>`` - ``\<address_space_ldu\?[bwql]\(_[lb]e\)\?\>`` - ``\<address_space_st[bwql]\(_[lb]e\)\?\>`` @@ -400,7 +400,7 @@ Note that portions of the write which attempt to write data to a device will be silently ignored -- only real RAM and ROM will be written to. -Regexes for git grep +Regexes for git grep: - ``address_space_write_rom`` ``{ld,st}*_phys`` @@ -438,7 +438,7 @@ device doing the access has no way to report such an error. The ``_{endian}_`` infix is omitted for byte accesses. -Regexes for git grep +Regexes for git grep: - ``\<ldu\?[bwlq]\(_[bl]e\)\?_phys\>`` - ``\<st[bwlq]\(_[bl]e\)\?_phys\>`` @@ -462,7 +462,7 @@ For new code they are better avoided: ``cpu_physical_memory_rw`` -Regexes for git grep +Regexes for git grep: - ``\<cpu_physical_memory_\(read\|write\|rw\)\>`` ``cpu_memory_rw_debug`` @@ -497,7 +497,7 @@ make sure our existing code is doing things correctly. ``dma_memory_rw`` -Regexes for git grep +Regexes for git grep: - ``\<dma_memory_\(read\|write\|rw\)\>`` - ``\<ldu\?[bwlq]\(_[bl]e\)\?_dma\>`` - ``\<st[bwlq]\(_[bl]e\)\?_dma\>`` @@ -538,7 +538,7 @@ correct address space for that device. The ``_{endian}_`` infix is omitted for byte accesses. -Regexes for git grep +Regexes for git grep: - ``\<pci_dma_\(read\|write\|rw\)\>`` - ``\<ldu\?[bwlq]\(_[bl]e\)\?_pci_dma\>`` - ``\<st[bwlq]\(_[bl]e\)\?_pci_dma\>`` diff --git a/docs/system/arm/emulation.rst b/docs/system/arm/emulation.rst index 3df936f..965cbf8 100644 --- a/docs/system/arm/emulation.rst +++ b/docs/system/arm/emulation.rst @@ -42,6 +42,7 @@ the following architecture extensions: - FEAT_FlagM2 (Enhancements to flag manipulation instructions) - FEAT_GTG (Guest translation granule size) - FEAT_HAFDBS (Hardware management of the access flag and dirty bit state) +- FEAT_HBC (Hinted conditional branches) - FEAT_HCX (Support for the HCRX_EL2 register) - FEAT_HPDS (Hierarchical permission disables) - FEAT_HPDS2 (Translation table page-based hardware attributes) @@ -57,6 +58,7 @@ the following architecture extensions: - FEAT_LSE (Large System Extensions) - FEAT_LSE2 (Large System Extensions v2) - FEAT_LVA (Large Virtual Address space) +- FEAT_MOPS (Standardization of memory operations) - FEAT_MTE (Memory Tagging Extension) - FEAT_MTE2 (Memory Tagging Extension) - FEAT_MTE3 (MTE Asymmetric Fault Handling) diff --git a/hw/arm/boot.c b/hw/arm/boot.c index 720f2253..24fa169 100644 --- a/hw/arm/boot.c +++ b/hw/arm/boot.c @@ -761,6 +761,10 @@ static void do_cpu_reset(void *opaque) if (cpu_isar_feature(aa64_hcx, cpu)) { env->cp15.scr_el3 |= SCR_HXEN; } + if (cpu_isar_feature(aa64_fgt, cpu)) { + env->cp15.scr_el3 |= SCR_FGTEN; + } + /* AArch64 kernels never boot in secure mode */ assert(!info->secure_boot); /* This hook is only supported for AArch32 currently: diff --git a/hw/arm/sbsa-ref.c b/hw/arm/sbsa-ref.c index bc89eb4..3c7dfcd 100644 --- a/hw/arm/sbsa-ref.c +++ b/hw/arm/sbsa-ref.c @@ -61,6 +61,7 @@ #define ARCH_TIMER_S_EL1_IRQ 13 #define ARCH_TIMER_NS_EL1_IRQ 14 #define ARCH_TIMER_NS_EL2_IRQ 10 +#define ARCH_TIMER_NS_EL2_VIRT_IRQ 12 enum { SBSA_FLASH, @@ -489,6 +490,7 @@ static void create_gic(SBSAMachineState *sms, MemoryRegion *mem) [GTIMER_VIRT] = ARCH_TIMER_VIRT_IRQ, [GTIMER_HYP] = ARCH_TIMER_NS_EL2_IRQ, [GTIMER_SEC] = ARCH_TIMER_S_EL1_IRQ, + [GTIMER_HYPVIRT] = ARCH_TIMER_NS_EL2_VIRT_IRQ, }; for (irq = 0; irq < ARRAY_SIZE(timer_irq); irq++) { diff --git a/linux-user/elfload.c b/linux-user/elfload.c index a5b28fa..db75cd4 100644 --- a/linux-user/elfload.c +++ b/linux-user/elfload.c @@ -402,6 +402,12 @@ enum ARM_HWCAP_ARM_VFPD32 = 1 << 19, ARM_HWCAP_ARM_LPAE = 1 << 20, ARM_HWCAP_ARM_EVTSTRM = 1 << 21, + ARM_HWCAP_ARM_FPHP = 1 << 22, + ARM_HWCAP_ARM_ASIMDHP = 1 << 23, + ARM_HWCAP_ARM_ASIMDDP = 1 << 24, + ARM_HWCAP_ARM_ASIMDFHM = 1 << 25, + ARM_HWCAP_ARM_ASIMDBF16 = 1 << 26, + ARM_HWCAP_ARM_I8MM = 1 << 27, }; enum { @@ -410,6 +416,8 @@ enum { ARM_HWCAP2_ARM_SHA1 = 1 << 2, ARM_HWCAP2_ARM_SHA2 = 1 << 3, ARM_HWCAP2_ARM_CRC32 = 1 << 4, + ARM_HWCAP2_ARM_SB = 1 << 5, + ARM_HWCAP2_ARM_SSBS = 1 << 6, }; /* The commpage only exists for 32 bit kernels */ @@ -498,6 +506,16 @@ uint32_t get_elf_hwcap(void) } } GET_FEATURE_ID(aa32_simdfmac, ARM_HWCAP_ARM_VFPv4); + /* + * MVFR1.FPHP and .SIMDHP must be in sync, and QEMU uses the same + * isar_feature function for both. The kernel reports them as two hwcaps. + */ + GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_FPHP); + GET_FEATURE_ID(aa32_fp16_arith, ARM_HWCAP_ARM_ASIMDHP); + GET_FEATURE_ID(aa32_dp, ARM_HWCAP_ARM_ASIMDDP); + GET_FEATURE_ID(aa32_fhm, ARM_HWCAP_ARM_ASIMDFHM); + GET_FEATURE_ID(aa32_bf16, ARM_HWCAP_ARM_ASIMDBF16); + GET_FEATURE_ID(aa32_i8mm, ARM_HWCAP_ARM_I8MM); return hwcaps; } @@ -512,6 +530,8 @@ uint32_t get_elf_hwcap2(void) GET_FEATURE_ID(aa32_sha1, ARM_HWCAP2_ARM_SHA1); GET_FEATURE_ID(aa32_sha2, ARM_HWCAP2_ARM_SHA2); GET_FEATURE_ID(aa32_crc32, ARM_HWCAP2_ARM_CRC32); + GET_FEATURE_ID(aa32_sb, ARM_HWCAP2_ARM_SB); + GET_FEATURE_ID(aa32_ssbs, ARM_HWCAP2_ARM_SSBS); return hwcaps; } @@ -540,6 +560,12 @@ const char *elf_hwcap_str(uint32_t bit) [__builtin_ctz(ARM_HWCAP_ARM_VFPD32 )] = "vfpd32", [__builtin_ctz(ARM_HWCAP_ARM_LPAE )] = "lpae", [__builtin_ctz(ARM_HWCAP_ARM_EVTSTRM )] = "evtstrm", + [__builtin_ctz(ARM_HWCAP_ARM_FPHP )] = "fphp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDHP )] = "asimdhp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDDP )] = "asimddp", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDFHM )] = "asimdfhm", + [__builtin_ctz(ARM_HWCAP_ARM_ASIMDBF16)] = "asimdbf16", + [__builtin_ctz(ARM_HWCAP_ARM_I8MM )] = "i8mm", }; return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; @@ -553,6 +579,8 @@ const char *elf_hwcap2_str(uint32_t bit) [__builtin_ctz(ARM_HWCAP2_ARM_SHA1 )] = "sha1", [__builtin_ctz(ARM_HWCAP2_ARM_SHA2 )] = "sha2", [__builtin_ctz(ARM_HWCAP2_ARM_CRC32)] = "crc32", + [__builtin_ctz(ARM_HWCAP2_ARM_SB )] = "sb", + [__builtin_ctz(ARM_HWCAP2_ARM_SSBS )] = "ssbs", }; return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; @@ -696,6 +724,20 @@ enum { ARM_HWCAP2_A64_SME_B16F32 = 1 << 28, ARM_HWCAP2_A64_SME_F32F32 = 1 << 29, ARM_HWCAP2_A64_SME_FA64 = 1 << 30, + ARM_HWCAP2_A64_WFXT = 1ULL << 31, + ARM_HWCAP2_A64_EBF16 = 1ULL << 32, + ARM_HWCAP2_A64_SVE_EBF16 = 1ULL << 33, + ARM_HWCAP2_A64_CSSC = 1ULL << 34, + ARM_HWCAP2_A64_RPRFM = 1ULL << 35, + ARM_HWCAP2_A64_SVE2P1 = 1ULL << 36, + ARM_HWCAP2_A64_SME2 = 1ULL << 37, + ARM_HWCAP2_A64_SME2P1 = 1ULL << 38, + ARM_HWCAP2_A64_SME_I16I32 = 1ULL << 39, + ARM_HWCAP2_A64_SME_BI32I32 = 1ULL << 40, + ARM_HWCAP2_A64_SME_B16B16 = 1ULL << 41, + ARM_HWCAP2_A64_SME_F16F16 = 1ULL << 42, + ARM_HWCAP2_A64_MOPS = 1ULL << 43, + ARM_HWCAP2_A64_HBC = 1ULL << 44, }; #define ELF_HWCAP get_elf_hwcap() @@ -773,6 +815,8 @@ uint32_t get_elf_hwcap2(void) GET_FEATURE_ID(aa64_sme_f64f64, ARM_HWCAP2_A64_SME_F64F64); GET_FEATURE_ID(aa64_sme_i16i64, ARM_HWCAP2_A64_SME_I16I64); GET_FEATURE_ID(aa64_sme_fa64, ARM_HWCAP2_A64_SME_FA64); + GET_FEATURE_ID(aa64_hbc, ARM_HWCAP2_A64_HBC); + GET_FEATURE_ID(aa64_mops, ARM_HWCAP2_A64_MOPS); return hwcaps; } @@ -844,13 +888,27 @@ const char *elf_hwcap2_str(uint32_t bit) [__builtin_ctz(ARM_HWCAP2_A64_RPRES )] = "rpres", [__builtin_ctz(ARM_HWCAP2_A64_MTE3 )] = "mte3", [__builtin_ctz(ARM_HWCAP2_A64_SME )] = "sme", - [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64 )] = "sme_i16i64", - [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64 )] = "sme_f64f64", - [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32 )] = "sme_i8i32", - [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32 )] = "sme_f16f32", - [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32 )] = "sme_b16f32", - [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32 )] = "sme_f32f32", - [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64 )] = "sme_fa64", + [__builtin_ctz(ARM_HWCAP2_A64_SME_I16I64 )] = "smei16i64", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F64F64 )] = "smef64f64", + [__builtin_ctz(ARM_HWCAP2_A64_SME_I8I32 )] = "smei8i32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F16F32 )] = "smef16f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_B16F32 )] = "smeb16f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_F32F32 )] = "smef32f32", + [__builtin_ctz(ARM_HWCAP2_A64_SME_FA64 )] = "smefa64", + [__builtin_ctz(ARM_HWCAP2_A64_WFXT )] = "wfxt", + [__builtin_ctzll(ARM_HWCAP2_A64_EBF16 )] = "ebf16", + [__builtin_ctzll(ARM_HWCAP2_A64_SVE_EBF16 )] = "sveebf16", + [__builtin_ctzll(ARM_HWCAP2_A64_CSSC )] = "cssc", + [__builtin_ctzll(ARM_HWCAP2_A64_RPRFM )] = "rprfm", + [__builtin_ctzll(ARM_HWCAP2_A64_SVE2P1 )] = "sve2p1", + [__builtin_ctzll(ARM_HWCAP2_A64_SME2 )] = "sme2", + [__builtin_ctzll(ARM_HWCAP2_A64_SME2P1 )] = "sme2p1", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_I16I32 )] = "smei16i32", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_BI32I32)] = "smebi32i32", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_B16B16 )] = "smeb16b16", + [__builtin_ctzll(ARM_HWCAP2_A64_SME_F16F16 )] = "smef16f16", + [__builtin_ctzll(ARM_HWCAP2_A64_MOPS )] = "mops", + [__builtin_ctzll(ARM_HWCAP2_A64_HBC )] = "hbc", }; return bit < ARRAY_SIZE(hwcap_str) ? hwcap_str[bit] : NULL; diff --git a/target/arm/cpu.h b/target/arm/cpu.h index f2e3dc4..bd55c5d 100644 --- a/target/arm/cpu.h +++ b/target/arm/cpu.h @@ -1315,6 +1315,7 @@ void pmu_init(ARMCPU *cpu); #define SCTLR_EnIB (1U << 30) /* v8.3, AArch64 only */ #define SCTLR_EnIA (1U << 31) /* v8.3, AArch64 only */ #define SCTLR_DSSBS_32 (1U << 31) /* v8.5, AArch32 only */ +#define SCTLR_MSCEN (1ULL << 33) /* FEAT_MOPS */ #define SCTLR_BT0 (1ULL << 35) /* v8.5-BTI */ #define SCTLR_BT1 (1ULL << 36) /* v8.5-BTI */ #define SCTLR_ITFSB (1ULL << 37) /* v8.5-MemTag */ @@ -2166,6 +2167,7 @@ FIELD(ID_AA64ISAR0, SHA1, 8, 4) FIELD(ID_AA64ISAR0, SHA2, 12, 4) FIELD(ID_AA64ISAR0, CRC32, 16, 4) FIELD(ID_AA64ISAR0, ATOMIC, 20, 4) +FIELD(ID_AA64ISAR0, TME, 24, 4) FIELD(ID_AA64ISAR0, RDM, 28, 4) FIELD(ID_AA64ISAR0, SHA3, 32, 4) FIELD(ID_AA64ISAR0, SM3, 36, 4) @@ -2200,6 +2202,13 @@ FIELD(ID_AA64ISAR2, APA3, 12, 4) FIELD(ID_AA64ISAR2, MOPS, 16, 4) FIELD(ID_AA64ISAR2, BC, 20, 4) FIELD(ID_AA64ISAR2, PAC_FRAC, 24, 4) +FIELD(ID_AA64ISAR2, CLRBHB, 28, 4) +FIELD(ID_AA64ISAR2, SYSREG_128, 32, 4) +FIELD(ID_AA64ISAR2, SYSINSTR_128, 36, 4) +FIELD(ID_AA64ISAR2, PRFMSLC, 40, 4) +FIELD(ID_AA64ISAR2, RPRFM, 48, 4) +FIELD(ID_AA64ISAR2, CSSC, 52, 4) +FIELD(ID_AA64ISAR2, ATS1A, 60, 4) FIELD(ID_AA64PFR0, EL0, 0, 4) FIELD(ID_AA64PFR0, EL1, 4, 4) @@ -2227,6 +2236,12 @@ FIELD(ID_AA64PFR1, SME, 24, 4) FIELD(ID_AA64PFR1, RNDR_TRAP, 28, 4) FIELD(ID_AA64PFR1, CSV2_FRAC, 32, 4) FIELD(ID_AA64PFR1, NMI, 36, 4) +FIELD(ID_AA64PFR1, MTE_FRAC, 40, 4) +FIELD(ID_AA64PFR1, GCS, 44, 4) +FIELD(ID_AA64PFR1, THE, 48, 4) +FIELD(ID_AA64PFR1, MTEX, 52, 4) +FIELD(ID_AA64PFR1, DF2, 56, 4) +FIELD(ID_AA64PFR1, PFAR, 60, 4) FIELD(ID_AA64MMFR0, PARANGE, 0, 4) FIELD(ID_AA64MMFR0, ASIDBITS, 4, 4) @@ -2258,6 +2273,7 @@ FIELD(ID_AA64MMFR1, AFP, 44, 4) FIELD(ID_AA64MMFR1, NTLBPA, 48, 4) FIELD(ID_AA64MMFR1, TIDCP1, 52, 4) FIELD(ID_AA64MMFR1, CMOW, 56, 4) +FIELD(ID_AA64MMFR1, ECBHB, 60, 4) FIELD(ID_AA64MMFR2, CNP, 0, 4) FIELD(ID_AA64MMFR2, UAO, 4, 4) @@ -2279,7 +2295,9 @@ FIELD(ID_AA64DFR0, DEBUGVER, 0, 4) FIELD(ID_AA64DFR0, TRACEVER, 4, 4) FIELD(ID_AA64DFR0, PMUVER, 8, 4) FIELD(ID_AA64DFR0, BRPS, 12, 4) +FIELD(ID_AA64DFR0, PMSS, 16, 4) FIELD(ID_AA64DFR0, WRPS, 20, 4) +FIELD(ID_AA64DFR0, SEBEP, 24, 4) FIELD(ID_AA64DFR0, CTX_CMPS, 28, 4) FIELD(ID_AA64DFR0, PMSVER, 32, 4) FIELD(ID_AA64DFR0, DOUBLELOCK, 36, 4) @@ -2287,12 +2305,14 @@ FIELD(ID_AA64DFR0, TRACEFILT, 40, 4) FIELD(ID_AA64DFR0, TRACEBUFFER, 44, 4) FIELD(ID_AA64DFR0, MTPMU, 48, 4) FIELD(ID_AA64DFR0, BRBE, 52, 4) +FIELD(ID_AA64DFR0, EXTTRCBUFF, 56, 4) FIELD(ID_AA64DFR0, HPMN0, 60, 4) FIELD(ID_AA64ZFR0, SVEVER, 0, 4) FIELD(ID_AA64ZFR0, AES, 4, 4) FIELD(ID_AA64ZFR0, BITPERM, 16, 4) FIELD(ID_AA64ZFR0, BFLOAT16, 20, 4) +FIELD(ID_AA64ZFR0, B16B16, 24, 4) FIELD(ID_AA64ZFR0, SHA3, 32, 4) FIELD(ID_AA64ZFR0, SM4, 40, 4) FIELD(ID_AA64ZFR0, I8MM, 44, 4) @@ -2300,9 +2320,13 @@ FIELD(ID_AA64ZFR0, F32MM, 52, 4) FIELD(ID_AA64ZFR0, F64MM, 56, 4) FIELD(ID_AA64SMFR0, F32F32, 32, 1) +FIELD(ID_AA64SMFR0, BI32I32, 33, 1) FIELD(ID_AA64SMFR0, B16F32, 34, 1) FIELD(ID_AA64SMFR0, F16F32, 35, 1) FIELD(ID_AA64SMFR0, I8I32, 36, 4) +FIELD(ID_AA64SMFR0, F16F16, 42, 1) +FIELD(ID_AA64SMFR0, B16B16, 43, 1) +FIELD(ID_AA64SMFR0, I16I32, 44, 4) FIELD(ID_AA64SMFR0, F64F64, 48, 1) FIELD(ID_AA64SMFR0, I16I64, 52, 4) FIELD(ID_AA64SMFR0, SMEVER, 56, 4) @@ -3147,6 +3171,7 @@ FIELD(TBFLAG_A64, SVL, 24, 4) FIELD(TBFLAG_A64, SME_TRAP_NONSTREAMING, 28, 1) FIELD(TBFLAG_A64, FGT_ERET, 29, 1) FIELD(TBFLAG_A64, NAA, 30, 1) +FIELD(TBFLAG_A64, ATA0, 31, 1) /* * Helpers for using the above. @@ -4065,6 +4090,11 @@ static inline bool isar_feature_aa64_i8mm(const ARMISARegisters *id) return FIELD_EX64(id->id_aa64isar1, ID_AA64ISAR1, I8MM) != 0; } +static inline bool isar_feature_aa64_hbc(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, BC) != 0; +} + static inline bool isar_feature_aa64_tgran4_lpa2(const ARMISARegisters *id) { return FIELD_SEX64(id->id_aa64mmfr0, ID_AA64MMFR0, TGRAN4) >= 1; @@ -4253,6 +4283,11 @@ static inline bool isar_feature_aa64_doublelock(const ARMISARegisters *id) return FIELD_SEX64(id->id_aa64dfr0, ID_AA64DFR0, DOUBLELOCK) >= 0; } +static inline bool isar_feature_aa64_mops(const ARMISARegisters *id) +{ + return FIELD_EX64(id->id_aa64isar2, ID_AA64ISAR2, MOPS); +} + /* * Feature tests for "does this exist in either 32-bit or 64-bit?" */ diff --git a/target/arm/helper.c b/target/arm/helper.c index 3b22596..8362078 100644 --- a/target/arm/helper.c +++ b/target/arm/helper.c @@ -5980,7 +5980,10 @@ static void hcrx_write(CPUARMState *env, const ARMCPRegInfo *ri, { uint64_t valid_mask = 0; - /* No features adding bits to HCRX are implemented. */ + /* FEAT_MOPS adds MSCEn and MCE2 */ + if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { + valid_mask |= HCRX_MSCEN | HCRX_MCE2; + } /* Clear RES0 bits. */ env->cp15.hcrx_el2 = value & valid_mask; @@ -6009,13 +6012,24 @@ uint64_t arm_hcrx_el2_eff(CPUARMState *env) { /* * The bits in this register behave as 0 for all purposes other than - * direct reads of the register if: - * - EL2 is not enabled in the current security state, - * - SCR_EL3.HXEn is 0. - */ - if (!arm_is_el2_enabled(env) - || (arm_feature(env, ARM_FEATURE_EL3) - && !(env->cp15.scr_el3 & SCR_HXEN))) { + * direct reads of the register if SCR_EL3.HXEn is 0. + * If EL2 is not enabled in the current security state, then the + * bit may behave as if 0, or as if 1, depending on the bit. + * For the moment, we treat the EL2-disabled case as taking + * priority over the HXEn-disabled case. This is true for the only + * bit for a feature which we implement where the answer is different + * for the two cases (MSCEn for FEAT_MOPS). + * This may need to be revisited for future bits. + */ + if (!arm_is_el2_enabled(env)) { + uint64_t hcrx = 0; + if (cpu_isar_feature(aa64_mops, env_archcpu(env))) { + /* MSCEn behaves as 1 if EL2 is not enabled */ + hcrx |= HCRX_MSCEN; + } + return hcrx; + } + if (arm_feature(env, ARM_FEATURE_EL3) && !(env->cp15.scr_el3 & SCR_HXEN)) { return 0; } return env->cp15.hcrx_el2; @@ -8621,11 +8635,16 @@ void register_cp_regs_for_features(ARMCPU *cpu) R_ID_AA64ZFR0_F64MM_MASK }, { .name = "ID_AA64SMFR0_EL1", .exported_bits = R_ID_AA64SMFR0_F32F32_MASK | + R_ID_AA64SMFR0_BI32I32_MASK | R_ID_AA64SMFR0_B16F32_MASK | R_ID_AA64SMFR0_F16F32_MASK | R_ID_AA64SMFR0_I8I32_MASK | + R_ID_AA64SMFR0_F16F16_MASK | + R_ID_AA64SMFR0_B16B16_MASK | + R_ID_AA64SMFR0_I16I32_MASK | R_ID_AA64SMFR0_F64F64_MASK | R_ID_AA64SMFR0_I16I64_MASK | + R_ID_AA64SMFR0_SMEVER_MASK | R_ID_AA64SMFR0_FA64_MASK }, { .name = "ID_AA64MMFR0_EL1", .exported_bits = R_ID_AA64MMFR0_ECV_MASK, @@ -8676,7 +8695,11 @@ void register_cp_regs_for_features(ARMCPU *cpu) .exported_bits = R_ID_AA64ISAR2_WFXT_MASK | R_ID_AA64ISAR2_RPRES_MASK | R_ID_AA64ISAR2_GPA3_MASK | - R_ID_AA64ISAR2_APA3_MASK }, + R_ID_AA64ISAR2_APA3_MASK | + R_ID_AA64ISAR2_MOPS_MASK | + R_ID_AA64ISAR2_BC_MASK | + R_ID_AA64ISAR2_RPRFM_MASK | + R_ID_AA64ISAR2_CSSC_MASK }, { .name = "ID_AA64ISAR*_EL1_RESERVED", .is_glob = true }, }; diff --git a/target/arm/internals.h b/target/arm/internals.h index 5f5393b..1dd9182 100644 --- a/target/arm/internals.h +++ b/target/arm/internals.h @@ -1272,6 +1272,61 @@ FIELD(MTEDESC, SIZEM1, 12, SIMD_DATA_BITS - 12) /* size - 1 */ bool mte_probe(CPUARMState *env, uint32_t desc, uint64_t ptr); uint64_t mte_check(CPUARMState *env, uint32_t desc, uint64_t ptr, uintptr_t ra); +/** + * mte_mops_probe: Check where the next MTE failure is for a FEAT_MOPS operation + * @env: CPU env + * @ptr: start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross a page boundary) + * @desc: MTEDESC descriptor word (0 means no MTE checks) + * Returns: the size of the region that can be copied without hitting + * an MTE tag failure + * + * Note that we assume that the caller has already checked the TBI + * and TCMA bits with mte_checks_needed() and an MTE check is definitely + * required. + */ +uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc); + +/** + * mte_mops_probe_rev: Check where the next MTE failure is for a FEAT_MOPS + * operation going in the reverse direction + * @env: CPU env + * @ptr: *end* address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross a page boundary) + * @desc: MTEDESC descriptor word (0 means no MTE checks) + * Returns: the size of the region that can be copied without hitting + * an MTE tag failure + * + * Note that we assume that the caller has already checked the TBI + * and TCMA bits with mte_checks_needed() and an MTE check is definitely + * required. + */ +uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc); + +/** + * mte_check_fail: Record an MTE tag check failure + * @env: CPU env + * @desc: MTEDESC descriptor word + * @dirty_ptr: Failing dirty address + * @ra: TCG retaddr + * + * This may never return (if the MTE tag checks are configured to fault). + */ +void mte_check_fail(CPUARMState *env, uint32_t desc, + uint64_t dirty_ptr, uintptr_t ra); + +/** + * mte_mops_set_tags: Set MTE tags for a portion of a FEAT_MOPS operation + * @env: CPU env + * @dirty_ptr: Start address of memory region (dirty pointer) + * @size: length of region (guaranteed not to cross page boundary) + * @desc: MTEDESC descriptor word + */ +void mte_mops_set_tags(CPUARMState *env, uint64_t dirty_ptr, uint64_t size, + uint32_t desc); + static inline int allocation_tag_from_addr(uint64_t ptr) { return extract64(ptr, 56, 4); diff --git a/target/arm/syndrome.h b/target/arm/syndrome.h index 8a6b8f8..5d34755 100644 --- a/target/arm/syndrome.h +++ b/target/arm/syndrome.h @@ -58,6 +58,7 @@ enum arm_exception_class { EC_DATAABORT = 0x24, EC_DATAABORT_SAME_EL = 0x25, EC_SPALIGNMENT = 0x26, + EC_MOP = 0x27, EC_AA32_FPTRAP = 0x28, EC_AA64_FPTRAP = 0x2c, EC_SERROR = 0x2f, @@ -334,4 +335,15 @@ static inline uint32_t syn_serror(uint32_t extra) return (EC_SERROR << ARM_EL_EC_SHIFT) | ARM_EL_IL | extra; } +static inline uint32_t syn_mop(bool is_set, bool is_setg, int options, + bool epilogue, bool wrong_option, bool option_a, + int destreg, int srcreg, int sizereg) +{ + return (EC_MOP << ARM_EL_EC_SHIFT) | ARM_EL_IL | + (is_set << 24) | (is_setg << 23) | (options << 19) | + (epilogue << 18) | (wrong_option << 17) | (option_a << 16) | + (destreg << 10) | (srcreg << 5) | sizereg; +} + + #endif /* TARGET_ARM_SYNDROME_H */ diff --git a/target/arm/tcg/a64.decode b/target/arm/tcg/a64.decode index ef64a3f..0cf1147 100644 --- a/target/arm/tcg/a64.decode +++ b/target/arm/tcg/a64.decode @@ -126,7 +126,8 @@ CBZ sf:1 011010 nz:1 ................... rt:5 &cbz imm=%imm19 TBZ . 011011 nz:1 ..... .............. rt:5 &tbz imm=%imm14 bitpos=%imm31_19 -B_cond 0101010 0 ................... 0 cond:4 imm=%imm19 +# B.cond and BC.cond +B_cond 0101010 0 ................... c:1 cond:4 imm=%imm19 BR 1101011 0000 11111 000000 rn:5 00000 &r BLR 1101011 0001 11111 000000 rn:5 00000 &r @@ -553,3 +554,38 @@ LDGM 11011001 11 1 ......... 00 ..... ..... @ldst_tag_mult p=0 w=0 STZ2G 11011001 11 1 ......... 01 ..... ..... @ldst_tag p=1 w=1 STZ2G 11011001 11 1 ......... 10 ..... ..... @ldst_tag p=0 w=0 STZ2G 11011001 11 1 ......... 11 ..... ..... @ldst_tag p=0 w=1 + +# Memory operations (memset, memcpy, memmove) +# Each of these comes in a set of three, eg SETP (prologue), SETM (main), +# SETE (epilogue), and each of those has different flavours to +# indicate whether memory accesses should be unpriv or non-temporal. +# We don't distinguish temporal and non-temporal accesses, but we +# do need to report it in syndrome register values. + +# Memset +&set rs rn rd unpriv nontemp +# op2 bit 1 is nontemporal bit +@set .. ......... rs:5 .. nontemp:1 unpriv:1 .. rn:5 rd:5 &set + +SETP 00 011001110 ..... 00 . . 01 ..... ..... @set +SETM 00 011001110 ..... 01 . . 01 ..... ..... @set +SETE 00 011001110 ..... 10 . . 01 ..... ..... @set + +# Like SET, but also setting MTE tags +SETGP 00 011101110 ..... 00 . . 01 ..... ..... @set +SETGM 00 011101110 ..... 01 . . 01 ..... ..... @set +SETGE 00 011101110 ..... 10 . . 01 ..... ..... @set + +# Memmove/Memcopy: the CPY insns allow overlapping src/dest and +# copy in the correct direction; the CPYF insns always copy forwards. +# +# options has the nontemporal and unpriv bits for src and dest +&cpy rs rn rd options +@cpy .. ... . ..... rs:5 options:4 .. rn:5 rd:5 &cpy + +CPYFP 00 011 0 01000 ..... .... 01 ..... ..... @cpy +CPYFM 00 011 0 01010 ..... .... 01 ..... ..... @cpy +CPYFE 00 011 0 01100 ..... .... 01 ..... ..... @cpy +CPYP 00 011 1 01000 ..... .... 01 ..... ..... @cpy +CPYM 00 011 1 01010 ..... .... 01 ..... ..... @cpy +CPYE 00 011 1 01100 ..... .... 01 ..... ..... @cpy diff --git a/target/arm/tcg/cpu64.c b/target/arm/tcg/cpu64.c index 7264ab5..68928e5 100644 --- a/target/arm/tcg/cpu64.c +++ b/target/arm/tcg/cpu64.c @@ -1027,6 +1027,11 @@ void aarch64_max_tcg_initfn(Object *obj) t = FIELD_DP64(t, ID_AA64ISAR1, I8MM, 1); /* FEAT_I8MM */ cpu->isar.id_aa64isar1 = t; + t = cpu->isar.id_aa64isar2; + t = FIELD_DP64(t, ID_AA64ISAR2, MOPS, 1); /* FEAT_MOPS */ + t = FIELD_DP64(t, ID_AA64ISAR2, BC, 1); /* FEAT_HBC */ + cpu->isar.id_aa64isar2 = t; + t = cpu->isar.id_aa64pfr0; t = FIELD_DP64(t, ID_AA64PFR0, FP, 1); /* FEAT_FP16 */ t = FIELD_DP64(t, ID_AA64PFR0, ADVSIMD, 1); /* FEAT_FP16 */ diff --git a/target/arm/tcg/helper-a64.c b/target/arm/tcg/helper-a64.c index 0cf56f6..84f5475 100644 --- a/target/arm/tcg/helper-a64.c +++ b/target/arm/tcg/helper-a64.c @@ -968,3 +968,881 @@ void HELPER(unaligned_access)(CPUARMState *env, uint64_t addr, arm_cpu_do_unaligned_access(env_cpu(env), addr, access_type, mmu_idx, GETPC()); } + +/* Memory operations (memset, memmove, memcpy) */ + +/* + * Return true if the CPY* and SET* insns can execute; compare + * pseudocode CheckMOPSEnabled(), though we refactor it a little. + */ +static bool mops_enabled(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el < 2 && + (arm_hcr_el2_eff(env) & (HCR_E2H | HCR_TGE)) != (HCR_E2H | HCR_TGE) && + !(arm_hcrx_el2_eff(env) & HCRX_MSCEN)) { + return false; + } + + if (el == 0) { + if (!el_is_in_host(env, 0)) { + return env->cp15.sctlr_el[1] & SCTLR_MSCEN; + } else { + return env->cp15.sctlr_el[2] & SCTLR_MSCEN; + } + } + return true; +} + +static void check_mops_enabled(CPUARMState *env, uintptr_t ra) +{ + if (!mops_enabled(env)) { + raise_exception_ra(env, EXCP_UDEF, syn_uncategorized(), + exception_target_el(env), ra); + } +} + +/* + * Return the target exception level for an exception due + * to mismatched arguments in a FEAT_MOPS copy or set. + * Compare pseudocode MismatchedCpySetTargetEL() + */ +static int mops_mismatch_exception_target_el(CPUARMState *env) +{ + int el = arm_current_el(env); + + if (el > 1) { + return el; + } + if (el == 0 && (arm_hcr_el2_eff(env) & HCR_TGE)) { + return 2; + } + if (el == 1 && (arm_hcrx_el2_eff(env) & HCRX_MCE2)) { + return 2; + } + return 1; +} + +/* + * Check whether an M or E instruction was executed with a CF value + * indicating the wrong option for this implementation. + * Assumes we are always Option A. + */ +static void check_mops_wrong_option(CPUARMState *env, uint32_t syndrome, + uintptr_t ra) +{ + if (env->CF != 0) { + syndrome |= 1 << 17; /* Set the wrong-option bit */ + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } +} + +/* + * Return the maximum number of bytes we can transfer starting at addr + * without crossing a page boundary. + */ +static uint64_t page_limit(uint64_t addr) +{ + return TARGET_PAGE_ALIGN(addr + 1) - addr; +} + +/* + * Return the number of bytes we can copy starting from addr and working + * backwards without crossing a page boundary. + */ +static uint64_t page_limit_rev(uint64_t addr) +{ + return (addr & ~TARGET_PAGE_MASK) + 1; +} + +/* + * Perform part of a memory set on an area of guest memory starting at + * toaddr (a dirty address) and extending for setsize bytes. + * + * Returns the number of bytes actually set, which might be less than + * setsize; the caller should loop until the whole set has been done. + * The caller should ensure that the guest registers are correct + * for the possibility that the first byte of the set encounters + * an exception or watchpoint. We guarantee not to take any faults + * for bytes other than the first. + */ +static uint64_t set_step(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ + void *mem; + + setsize = MIN(setsize, page_limit(toaddr)); + if (*mtedesc) { + uint64_t mtesize = mte_mops_probe(env, toaddr, setsize, *mtedesc); + if (mtesize == 0) { + /* Trap, or not. All CPU state is up to date */ + mte_check_fail(env, *mtedesc, toaddr, ra); + /* Continue, with no further MTE checks required */ + *mtedesc = 0; + } else { + /* Advance to the end, or to the tag mismatch */ + setsize = MIN(setsize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + /* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY + if (unlikely(!mem)) { + /* + * Slow-path: just do one byte write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + cpu_stb_mmuidx_ra(env, toaddr, data, memidx, ra); + return 1; + } +#endif + /* Easy case: just memset the host memory */ + memset(mem, data, setsize); + return setsize; +} + +/* + * Similar, but setting tags. The architecture requires us to do this + * in 16-byte chunks. SETP accesses are not tag checked; they set + * the tags. + */ +static uint64_t set_step_tags(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, int memidx, + uint32_t *mtedesc, uintptr_t ra) +{ + void *mem; + uint64_t cleanaddr; + + setsize = MIN(setsize, page_limit(toaddr)); + + cleanaddr = useronly_clean_ptr(toaddr); + /* + * Trapless lookup: returns NULL for invalid page, I/O, + * watchpoints, clean pages, etc. + */ + mem = tlb_vaddr_to_host(env, cleanaddr, MMU_DATA_STORE, memidx); + +#ifndef CONFIG_USER_ONLY + if (unlikely(!mem)) { + /* + * Slow-path: just do one write. This will handle the + * watchpoint, invalid page, etc handling correctly. + * The architecture requires that we do 16 bytes at a time, + * and we know both ptr and size are 16 byte aligned. + * For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + uint64_t repldata = data * 0x0101010101010101ULL; + MemOpIdx oi16 = make_memop_idx(MO_TE | MO_128, memidx); + cpu_st16_mmu(env, toaddr, int128_make128(repldata, repldata), oi16, ra); + mte_mops_set_tags(env, toaddr, 16, *mtedesc); + return 16; + } +#endif + /* Easy case: just memset the host memory */ + memset(mem, data, setsize); + mte_mops_set_tags(env, toaddr, setsize, *mtedesc); + return setsize; +} + +typedef uint64_t StepFn(CPUARMState *env, uint64_t toaddr, + uint64_t setsize, uint32_t data, + int memidx, uint32_t *mtedesc, uintptr_t ra); + +/* Extract register numbers from a MOPS exception syndrome value */ +static int mops_destreg(uint32_t syndrome) +{ + return extract32(syndrome, 10, 5); +} + +static int mops_srcreg(uint32_t syndrome) +{ + return extract32(syndrome, 5, 5); +} + +static int mops_sizereg(uint32_t syndrome) +{ + return extract32(syndrome, 0, 5); +} + +/* + * Return true if TCMA and TBI bits mean we need to do MTE checks. + * We only need to do this once per MOPS insn, not for every page. + */ +static bool mte_checks_needed(uint64_t ptr, uint32_t desc) +{ + int bit55 = extract64(ptr, 55, 1); + + /* + * Note that tbi_check() returns true for "access checked" but + * tcma_check() returns true for "access unchecked". + */ + if (!tbi_check(desc, bit55)) { + return false; + } + return !tcma_check(desc, bit55, allocation_tag_from_addr(ptr)); +} + +/* Take an exception if the SETG addr/size are not granule aligned */ +static void check_setg_alignment(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t memidx, uintptr_t ra) +{ + if ((size != 0 && !QEMU_IS_ALIGNED(ptr, TAG_GRANULE)) || + !QEMU_IS_ALIGNED(size, TAG_GRANULE)) { + arm_cpu_do_unaligned_access(env_cpu(env), ptr, MMU_DATA_STORE, + memidx, ra); + + } +} + +/* + * For the Memory Set operation, our implementation chooses + * always to use "option A", where we update Xd to the final + * address in the SETP insn, and set Xn to be -(bytes remaining). + * On SETM and SETE insns we only need update Xn. + * + * @env: CPU + * @syndrome: syndrome value for mismatch exceptions + * (also contains the register numbers we need to use) + * @mtedesc: MTE descriptor word + * @stepfn: function which does a single part of the set operation + * @is_setg: true if this is the tag-setting SETG variant + */ +static void do_setp(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Prologue: we choose to do up to the next page boundary */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = env->xregs[rs]; + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t toaddr = env->xregs[rd]; + uint64_t setsize = env->xregs[rn]; + uint64_t stagesetsize, step; + + check_mops_enabled(env, ra); + + if (setsize > INT64_MAX) { + setsize = INT64_MAX; + if (is_setg) { + setsize &= ~0xf; + } + } + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + stagesetsize = MIN(setsize, page_limit(toaddr)); + while (stagesetsize) { + env->xregs[rd] = toaddr; + env->xregs[rn] = setsize; + step = stepfn(env, toaddr, stagesetsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + stagesetsize -= step; + } + /* Insn completed, so update registers to the Option A format */ + env->xregs[rd] = toaddr + setsize; + env->xregs[rn] = -setsize; + + /* Set NZCV = 0000 to indicate we are an Option A implementation */ + env->NF = 0; + env->ZF = 1; /* our env->ZF encoding is inverted */ + env->CF = 0; + env->VF = 0; + return; +} + +void HELPER(setp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setp(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setgp)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setp(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +static void do_setm(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Main: we choose to do all the full-page chunks */ + CPUState *cs = env_cpu(env); + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = env->xregs[rs]; + uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; + uint64_t setsize = -env->xregs[rn]; + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t step, stagesetsize; + + check_mops_enabled(env, ra); + + /* + * We're allowed to NOP out "no data to copy" before the consistency + * checks; we choose to do so. + */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + /* + * Our implementation will work fine even if we have an unaligned + * destination address, and because we update Xn every time around + * the loop below and the return value from stepfn() may be less + * than requested, we might find toaddr is unaligned. So we don't + * have an IMPDEF check for alignment here. + */ + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + /* Do the actual memset: we leave the last partial page to SETE */ + stagesetsize = setsize & TARGET_PAGE_MASK; + while (stagesetsize > 0) { + step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + stagesetsize -= step; + env->xregs[rn] = -setsize; + if (stagesetsize > 0 && unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } +} + +void HELPER(setm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setm(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setgm)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_setm(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +static void do_sete(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc, + StepFn *stepfn, bool is_setg, uintptr_t ra) +{ + /* Epilogue: do the last partial page */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint8_t data = env->xregs[rs]; + uint64_t toaddr = env->xregs[rd] + env->xregs[rn]; + uint64_t setsize = -env->xregs[rn]; + uint32_t memidx = FIELD_EX32(mtedesc, MTEDESC, MIDX); + uint64_t step; + + check_mops_enabled(env, ra); + + /* + * We're allowed to NOP out "no data to copy" before the consistency + * checks; we choose to do so. + */ + if (setsize == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + /* + * Our implementation has no address alignment requirements, but + * we do want to enforce the "less than a page" size requirement, + * so we don't need to have the "check for interrupts" here. + */ + if (setsize >= TARGET_PAGE_SIZE) { + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } + + if (unlikely(is_setg)) { + check_setg_alignment(env, toaddr, setsize, memidx, ra); + } else if (!mte_checks_needed(toaddr, mtedesc)) { + mtedesc = 0; + } + + /* Do the actual memset */ + while (setsize > 0) { + step = stepfn(env, toaddr, setsize, data, memidx, &mtedesc, ra); + toaddr += step; + setsize -= step; + env->xregs[rn] = -setsize; + } +} + +void HELPER(sete)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_sete(env, syndrome, mtedesc, set_step, false, GETPC()); +} + +void HELPER(setge)(CPUARMState *env, uint32_t syndrome, uint32_t mtedesc) +{ + do_sete(env, syndrome, mtedesc, set_step_tags, true, GETPC()); +} + +/* + * Perform part of a memory copy from the guest memory at fromaddr + * and extending for copysize bytes, to the guest memory at + * toaddr. Both addreses are dirty. + * + * Returns the number of bytes actually set, which might be less than + * copysize; the caller should loop until the whole copy has been done. + * The caller should ensure that the guest registers are correct + * for the possibility that the first byte of the copy encounters + * an exception or watchpoint. We guarantee not to take any faults + * for bytes other than the first. + */ +static uint64_t copy_step(CPUARMState *env, uint64_t toaddr, uint64_t fromaddr, + uint64_t copysize, int wmemidx, int rmemidx, + uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) +{ + void *rmem; + void *wmem; + + /* Don't cross a page boundary on either source or destination */ + copysize = MIN(copysize, page_limit(toaddr)); + copysize = MIN(copysize, page_limit(fromaddr)); + /* + * Handle MTE tag checks: either handle the tag mismatch for byte 0, + * or else copy up to but not including the byte with the mismatch. + */ + if (*rdesc) { + uint64_t mtesize = mte_mops_probe(env, fromaddr, copysize, *rdesc); + if (mtesize == 0) { + mte_check_fail(env, *rdesc, fromaddr, ra); + *rdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + if (*wdesc) { + uint64_t mtesize = mte_mops_probe(env, toaddr, copysize, *wdesc); + if (mtesize == 0) { + mte_check_fail(env, *wdesc, toaddr, ra); + *wdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + fromaddr = useronly_clean_ptr(fromaddr); + /* Trapless lookup of whether we can get a host memory pointer */ + wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); + rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); + +#ifndef CONFIG_USER_ONLY + /* + * If we don't have host memory for both source and dest then just + * do a single byte copy. This will handle watchpoints, invalid pages, + * etc correctly. For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + if (unlikely(!rmem || !wmem)) { + uint8_t byte; + if (rmem) { + byte = *(uint8_t *)rmem; + } else { + byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); + } + if (wmem) { + *(uint8_t *)wmem = byte; + } else { + cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); + } + return 1; + } +#endif + /* Easy case: just memmove the host memory */ + memmove(wmem, rmem, copysize); + return copysize; +} + +/* + * Do part of a backwards memory copy. Here toaddr and fromaddr point + * to the *last* byte to be copied. + */ +static uint64_t copy_step_rev(CPUARMState *env, uint64_t toaddr, + uint64_t fromaddr, + uint64_t copysize, int wmemidx, int rmemidx, + uint32_t *wdesc, uint32_t *rdesc, uintptr_t ra) +{ + void *rmem; + void *wmem; + + /* Don't cross a page boundary on either source or destination */ + copysize = MIN(copysize, page_limit_rev(toaddr)); + copysize = MIN(copysize, page_limit_rev(fromaddr)); + + /* + * Handle MTE tag checks: either handle the tag mismatch for byte 0, + * or else copy up to but not including the byte with the mismatch. + */ + if (*rdesc) { + uint64_t mtesize = mte_mops_probe_rev(env, fromaddr, copysize, *rdesc); + if (mtesize == 0) { + mte_check_fail(env, *rdesc, fromaddr, ra); + *rdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + if (*wdesc) { + uint64_t mtesize = mte_mops_probe_rev(env, toaddr, copysize, *wdesc); + if (mtesize == 0) { + mte_check_fail(env, *wdesc, toaddr, ra); + *wdesc = 0; + } else { + copysize = MIN(copysize, mtesize); + } + } + + toaddr = useronly_clean_ptr(toaddr); + fromaddr = useronly_clean_ptr(fromaddr); + /* Trapless lookup of whether we can get a host memory pointer */ + wmem = tlb_vaddr_to_host(env, toaddr, MMU_DATA_STORE, wmemidx); + rmem = tlb_vaddr_to_host(env, fromaddr, MMU_DATA_LOAD, rmemidx); + +#ifndef CONFIG_USER_ONLY + /* + * If we don't have host memory for both source and dest then just + * do a single byte copy. This will handle watchpoints, invalid pages, + * etc correctly. For clean code pages, the next iteration will see + * the page dirty and will use the fast path. + */ + if (unlikely(!rmem || !wmem)) { + uint8_t byte; + if (rmem) { + byte = *(uint8_t *)rmem; + } else { + byte = cpu_ldub_mmuidx_ra(env, fromaddr, rmemidx, ra); + } + if (wmem) { + *(uint8_t *)wmem = byte; + } else { + cpu_stb_mmuidx_ra(env, toaddr, byte, wmemidx, ra); + } + return 1; + } +#endif + /* + * Easy case: just memmove the host memory. Note that wmem and + * rmem here point to the *last* byte to copy. + */ + memmove(wmem - (copysize - 1), rmem - (copysize - 1), copysize); + return copysize; +} + +/* + * for the Memory Copy operation, our implementation chooses always + * to use "option A", where we update Xd and Xs to the final addresses + * in the CPYP insn, and then in CPYM and CPYE only need to update Xn. + * + * @env: CPU + * @syndrome: syndrome value for mismatch exceptions + * (also contains the register numbers we need to use) + * @wdesc: MTE descriptor for the writes (destination) + * @rdesc: MTE descriptor for the reads (source) + * @move: true if this is CPY (memmove), false for CPYF (memcpy forwards) + */ +static void do_cpyp(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr = env->xregs[rd]; + uint64_t fromaddr = env->xregs[rs]; + uint64_t copysize = env->xregs[rn]; + uint64_t stagecopysize, step; + + check_mops_enabled(env, ra); + + + if (move) { + /* + * Copy backwards if necessary. The direction for a non-overlapping + * copy is IMPDEF; we choose forwards. + */ + if (copysize > 0x007FFFFFFFFFFFFFULL) { + copysize = 0x007FFFFFFFFFFFFFULL; + } + uint64_t fs = extract64(fromaddr, 0, 56); + uint64_t ts = extract64(toaddr, 0, 56); + uint64_t fe = extract64(fromaddr + copysize, 0, 56); + + if (fs < ts && fe > ts) { + forwards = false; + } + } else { + if (copysize > INT64_MAX) { + copysize = INT64_MAX; + } + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + if (forwards) { + stagecopysize = MIN(copysize, page_limit(toaddr)); + stagecopysize = MIN(stagecopysize, page_limit(fromaddr)); + while (stagecopysize) { + env->xregs[rd] = toaddr; + env->xregs[rs] = fromaddr; + env->xregs[rn] = copysize; + step = copy_step(env, toaddr, fromaddr, stagecopysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + stagecopysize -= step; + } + /* Insn completed, so update registers to the Option A format */ + env->xregs[rd] = toaddr + copysize; + env->xregs[rs] = fromaddr + copysize; + env->xregs[rn] = -copysize; + } else { + /* + * In a reverse copy the to and from addrs in Xs and Xd are the start + * of the range, but it's more convenient for us to work with pointers + * to the last byte being copied. + */ + toaddr += copysize - 1; + fromaddr += copysize - 1; + stagecopysize = MIN(copysize, page_limit_rev(toaddr)); + stagecopysize = MIN(stagecopysize, page_limit_rev(fromaddr)); + while (stagecopysize) { + env->xregs[rn] = copysize; + step = copy_step_rev(env, toaddr, fromaddr, stagecopysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + copysize -= step; + stagecopysize -= step; + toaddr -= step; + fromaddr -= step; + } + /* + * Insn completed, so update registers to the Option A format. + * For a reverse copy this is no different to the CPYP input format. + */ + env->xregs[rn] = copysize; + } + + /* Set NZCV = 0000 to indicate we are an Option A implementation */ + env->NF = 0; + env->ZF = 1; /* our env->ZF encoding is inverted */ + env->CF = 0; + env->VF = 0; + return; +} + +void HELPER(cpyp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpyp(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfp)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpyp(env, syndrome, wdesc, rdesc, false, GETPC()); +} + +static void do_cpym(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + /* Main: we choose to copy until less than a page remaining */ + CPUState *cs = env_cpu(env); + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr, fromaddr, copysize, step; + + check_mops_enabled(env, ra); + + /* We choose to NOP out "no data to copy" before consistency checks */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + if (move) { + forwards = (int64_t)env->xregs[rn] < 0; + } + + if (forwards) { + toaddr = env->xregs[rd] + env->xregs[rn]; + fromaddr = env->xregs[rs] + env->xregs[rn]; + copysize = -env->xregs[rn]; + } else { + copysize = env->xregs[rn]; + /* This toaddr and fromaddr point to the *last* byte to copy */ + toaddr = env->xregs[rd] + copysize - 1; + fromaddr = env->xregs[rs] + copysize - 1; + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + /* Our implementation has no particular parameter requirements for CPYM */ + + /* Do the actual memmove */ + if (forwards) { + while (copysize >= TARGET_PAGE_SIZE) { + step = copy_step(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + env->xregs[rn] = -copysize; + if (copysize >= TARGET_PAGE_SIZE && + unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } + } else { + while (copysize >= TARGET_PAGE_SIZE) { + step = copy_step_rev(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr -= step; + fromaddr -= step; + copysize -= step; + env->xregs[rn] = copysize; + if (copysize >= TARGET_PAGE_SIZE && + unlikely(cpu_loop_exit_requested(cs))) { + cpu_loop_exit_restore(cs, ra); + } + } + } +} + +void HELPER(cpym)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpym(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfm)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpym(env, syndrome, wdesc, rdesc, false, GETPC()); +} + +static void do_cpye(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc, uint32_t move, uintptr_t ra) +{ + /* Epilogue: do the last partial page */ + int rd = mops_destreg(syndrome); + int rs = mops_srcreg(syndrome); + int rn = mops_sizereg(syndrome); + uint32_t rmemidx = FIELD_EX32(rdesc, MTEDESC, MIDX); + uint32_t wmemidx = FIELD_EX32(wdesc, MTEDESC, MIDX); + bool forwards = true; + uint64_t toaddr, fromaddr, copysize, step; + + check_mops_enabled(env, ra); + + /* We choose to NOP out "no data to copy" before consistency checks */ + if (env->xregs[rn] == 0) { + return; + } + + check_mops_wrong_option(env, syndrome, ra); + + if (move) { + forwards = (int64_t)env->xregs[rn] < 0; + } + + if (forwards) { + toaddr = env->xregs[rd] + env->xregs[rn]; + fromaddr = env->xregs[rs] + env->xregs[rn]; + copysize = -env->xregs[rn]; + } else { + copysize = env->xregs[rn]; + /* This toaddr and fromaddr point to the *last* byte to copy */ + toaddr = env->xregs[rd] + copysize - 1; + fromaddr = env->xregs[rs] + copysize - 1; + } + + if (!mte_checks_needed(fromaddr, rdesc)) { + rdesc = 0; + } + if (!mte_checks_needed(toaddr, wdesc)) { + wdesc = 0; + } + + /* Check the size; we don't want to have do a check-for-interrupts */ + if (copysize >= TARGET_PAGE_SIZE) { + raise_exception_ra(env, EXCP_UDEF, syndrome, + mops_mismatch_exception_target_el(env), ra); + } + + /* Do the actual memmove */ + if (forwards) { + while (copysize > 0) { + step = copy_step(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr += step; + fromaddr += step; + copysize -= step; + env->xregs[rn] = -copysize; + } + } else { + while (copysize > 0) { + step = copy_step_rev(env, toaddr, fromaddr, copysize, + wmemidx, rmemidx, &wdesc, &rdesc, ra); + toaddr -= step; + fromaddr -= step; + copysize -= step; + env->xregs[rn] = copysize; + } + } +} + +void HELPER(cpye)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpye(env, syndrome, wdesc, rdesc, true, GETPC()); +} + +void HELPER(cpyfe)(CPUARMState *env, uint32_t syndrome, uint32_t wdesc, + uint32_t rdesc) +{ + do_cpye(env, syndrome, wdesc, rdesc, false, GETPC()); +} diff --git a/target/arm/tcg/helper-a64.h b/target/arm/tcg/helper-a64.h index 57cfd68..575a5da 100644 --- a/target/arm/tcg/helper-a64.h +++ b/target/arm/tcg/helper-a64.h @@ -117,3 +117,17 @@ DEF_HELPER_FLAGS_3(stzgm_tags, TCG_CALL_NO_WG, void, env, i64, i64) DEF_HELPER_FLAGS_4(unaligned_access, TCG_CALL_NO_WG, noreturn, env, i64, i32, i32) + +DEF_HELPER_3(setp, void, env, i32, i32) +DEF_HELPER_3(setm, void, env, i32, i32) +DEF_HELPER_3(sete, void, env, i32, i32) +DEF_HELPER_3(setgp, void, env, i32, i32) +DEF_HELPER_3(setgm, void, env, i32, i32) +DEF_HELPER_3(setge, void, env, i32, i32) + +DEF_HELPER_4(cpyp, void, env, i32, i32, i32) +DEF_HELPER_4(cpym, void, env, i32, i32, i32) +DEF_HELPER_4(cpye, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfp, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfm, void, env, i32, i32, i32) +DEF_HELPER_4(cpyfe, void, env, i32, i32, i32) diff --git a/target/arm/tcg/hflags.c b/target/arm/tcg/hflags.c index 616c5fa..cea1adb 100644 --- a/target/arm/tcg/hflags.c +++ b/target/arm/tcg/hflags.c @@ -306,6 +306,15 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, && !(env->pstate & PSTATE_TCO) && (sctlr & (el == 0 ? SCTLR_TCF0 : SCTLR_TCF))) { DP_TBFLAG_A64(flags, MTE_ACTIVE, 1); + if (!EX_TBFLAG_A64(flags, UNPRIV)) { + /* + * In non-unpriv contexts (eg EL0), unpriv load/stores + * act like normal ones; duplicate the MTE info to + * avoid translate-a64.c having to check UNPRIV to see + * whether it is OK to index into MTE_ACTIVE[]. + */ + DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); + } } } /* And again for unprivileged accesses, if required. */ @@ -316,6 +325,18 @@ static CPUARMTBFlags rebuild_hflags_a64(CPUARMState *env, int el, int fp_el, && allocation_tag_access_enabled(env, 0, sctlr)) { DP_TBFLAG_A64(flags, MTE0_ACTIVE, 1); } + /* + * For unpriv tag-setting accesses we alse need ATA0. Again, in + * contexts where unpriv and normal insns are the same we + * duplicate the ATA bit to save effort for translate-a64.c. + */ + if (EX_TBFLAG_A64(flags, UNPRIV)) { + if (allocation_tag_access_enabled(env, 0, sctlr)) { + DP_TBFLAG_A64(flags, ATA0, 1); + } + } else { + DP_TBFLAG_A64(flags, ATA0, EX_TBFLAG_A64(flags, ATA)); + } /* Cache TCMA as well as TBI. */ DP_TBFLAG_A64(flags, TCMA, aa64_va_parameter_tcma(tcr, mmu_idx)); } diff --git a/target/arm/tcg/mte_helper.c b/target/arm/tcg/mte_helper.c index b23d115..2dd7eb3 100644 --- a/target/arm/tcg/mte_helper.c +++ b/target/arm/tcg/mte_helper.c @@ -50,14 +50,14 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) } /** - * allocation_tag_mem: + * allocation_tag_mem_probe: * @env: the cpu environment * @ptr_mmu_idx: the addressing regime to use for the virtual address * @ptr: the virtual address for which to look up tag memory * @ptr_access: the access to use for the virtual address * @ptr_size: the number of bytes in the normal memory access * @tag_access: the access to use for the tag memory - * @tag_size: the number of bytes in the tag memory access + * @probe: true to merely probe, never taking an exception * @ra: the return address for exception handling * * Our tag memory is formatted as a sequence of little-endian nibbles. @@ -66,18 +66,25 @@ static int choose_nonexcluded_tag(int tag, int offset, uint16_t exclude) * for the higher addr. * * Here, resolve the physical address from the virtual address, and return - * a pointer to the corresponding tag byte. Exit with exception if the - * virtual address is not accessible for @ptr_access. - * - * The @ptr_size and @tag_size values may not have an obvious relation - * due to the alignment of @ptr, and the number of tag checks required. + * a pointer to the corresponding tag byte. * * If there is no tag storage corresponding to @ptr, return NULL. + * + * If the page is inaccessible for @ptr_access, or has a watchpoint, there are + * three options: + * (1) probe = true, ra = 0 : pure probe -- we return NULL if the page is not + * accessible, and do not take watchpoint traps. The calling code must + * handle those cases in the right priority compared to MTE traps. + * (2) probe = false, ra = 0 : probe, no fault expected -- the caller guarantees + * that the page is going to be accessible. We will take watchpoint traps. + * (3) probe = false, ra != 0 : non-probe -- we will take both memory access + * traps and watchpoint traps. + * (probe = true, ra != 0 is invalid and will assert.) */ -static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, - uint64_t ptr, MMUAccessType ptr_access, - int ptr_size, MMUAccessType tag_access, - int tag_size, uintptr_t ra) +static uint8_t *allocation_tag_mem_probe(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + bool probe, uintptr_t ra) { #ifdef CONFIG_USER_ONLY uint64_t clean_ptr = useronly_clean_ptr(ptr); @@ -85,6 +92,8 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, uint8_t *tags; uintptr_t index; + assert(!(probe && ra)); + if (!(flags & (ptr_access == MMU_DATA_STORE ? PAGE_WRITE_ORG : PAGE_READ))) { cpu_loop_exit_sigsegv(env_cpu(env), ptr, ptr_access, !(flags & PAGE_VALID), ra); @@ -115,12 +124,16 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, * exception for inaccessible pages, and resolves the virtual address * into the softmmu tlb. * - * When RA == 0, this is for mte_probe. The page is expected to be - * valid. Indicate to probe_access_flags no-fault, then assert that - * we received a valid page. + * When RA == 0, this is either a pure probe or a no-fault-expected probe. + * Indicate to probe_access_flags no-fault, then either return NULL + * for the pure probe, or assert that we received a valid page for the + * no-fault-expected probe. */ flags = probe_access_full(env, ptr, 0, ptr_access, ptr_mmu_idx, ra == 0, &host, &full, ra); + if (probe && (flags & TLB_INVALID_MASK)) { + return NULL; + } assert(!(flags & TLB_INVALID_MASK)); /* If the virtual page MemAttr != Tagged, access unchecked. */ @@ -161,7 +174,7 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, } /* Any debug exception has priority over a tag check exception. */ - if (unlikely(flags & TLB_WATCHPOINT)) { + if (!probe && unlikely(flags & TLB_WATCHPOINT)) { int wp = ptr_access == MMU_DATA_LOAD ? BP_MEM_READ : BP_MEM_WRITE; assert(ra != 0); cpu_check_watchpoint(env_cpu(env), ptr, ptr_size, attrs, wp, ra); @@ -203,6 +216,15 @@ static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, #endif } +static uint8_t *allocation_tag_mem(CPUARMState *env, int ptr_mmu_idx, + uint64_t ptr, MMUAccessType ptr_access, + int ptr_size, MMUAccessType tag_access, + uintptr_t ra) +{ + return allocation_tag_mem_probe(env, ptr_mmu_idx, ptr, ptr_access, + ptr_size, tag_access, false, ra); +} + uint64_t HELPER(irg)(CPUARMState *env, uint64_t rn, uint64_t rm) { uint16_t exclude = extract32(rm | env->cp15.gcr_el1, 0, 16); @@ -275,7 +297,7 @@ uint64_t HELPER(ldg)(CPUARMState *env, uint64_t ptr, uint64_t xt) /* Trap if accessing an invalid page. */ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, 1, - MMU_DATA_LOAD, 1, GETPC()); + MMU_DATA_LOAD, GETPC()); /* Load if page supports tags. */ if (mem) { @@ -329,7 +351,7 @@ static inline void do_stg(CPUARMState *env, uint64_t ptr, uint64_t xt, /* Trap if accessing an invalid page. */ mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, TAG_GRANULE, - MMU_DATA_STORE, 1, ra); + MMU_DATA_STORE, ra); /* Store if page supports tags. */ if (mem) { @@ -372,10 +394,10 @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, if (ptr & TAG_GRANULE) { /* Two stores unaligned mod TAG_GRANULE*2 -- modify two bytes. */ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - TAG_GRANULE, MMU_DATA_STORE, 1, ra); + TAG_GRANULE, MMU_DATA_STORE, ra); mem2 = allocation_tag_mem(env, mmu_idx, ptr + TAG_GRANULE, MMU_DATA_STORE, TAG_GRANULE, - MMU_DATA_STORE, 1, ra); + MMU_DATA_STORE, ra); /* Store if page(s) support tags. */ if (mem1) { @@ -387,7 +409,7 @@ static inline void do_st2g(CPUARMState *env, uint64_t ptr, uint64_t xt, } else { /* Two stores aligned mod TAG_GRANULE*2 -- modify one byte. */ mem1 = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - 2 * TAG_GRANULE, MMU_DATA_STORE, 1, ra); + 2 * TAG_GRANULE, MMU_DATA_STORE, ra); if (mem1) { tag |= tag << 4; qatomic_set(mem1, tag); @@ -435,8 +457,7 @@ uint64_t HELPER(ldgm)(CPUARMState *env, uint64_t ptr) /* Trap if accessing an invalid page. */ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_LOAD, - gm_bs_bytes, MMU_DATA_LOAD, - gm_bs_bytes / (2 * TAG_GRANULE), ra); + gm_bs_bytes, MMU_DATA_LOAD, ra); /* The tag is squashed to zero if the page does not support tags. */ if (!tag_mem) { @@ -495,8 +516,7 @@ void HELPER(stgm)(CPUARMState *env, uint64_t ptr, uint64_t val) /* Trap if accessing an invalid page. */ tag_mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, - gm_bs_bytes, MMU_DATA_LOAD, - gm_bs_bytes / (2 * TAG_GRANULE), ra); + gm_bs_bytes, MMU_DATA_LOAD, ra); /* * Tag store only happens if the page support tags, @@ -552,7 +572,7 @@ void HELPER(stzgm_tags)(CPUARMState *env, uint64_t ptr, uint64_t val) ptr &= -dcz_bytes; mem = allocation_tag_mem(env, mmu_idx, ptr, MMU_DATA_STORE, dcz_bytes, - MMU_DATA_STORE, tag_bytes, ra); + MMU_DATA_STORE, ra); if (mem) { int tag_pair = (val & 0xf) * 0x11; memset(mem, tag_pair, tag_bytes); @@ -597,8 +617,8 @@ static void mte_async_check_fail(CPUARMState *env, uint64_t dirty_ptr, } /* Record a tag check failure. */ -static void mte_check_fail(CPUARMState *env, uint32_t desc, - uint64_t dirty_ptr, uintptr_t ra) +void mte_check_fail(CPUARMState *env, uint32_t desc, + uint64_t dirty_ptr, uintptr_t ra) { int mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); ARMMMUIdx arm_mmu_idx = core_to_aa64_mmu_idx(mmu_idx); @@ -715,6 +735,55 @@ static int checkN(uint8_t *mem, int odd, int cmp, int count) } /** + * checkNrev: + * @tag: tag memory to test + * @odd: true to begin testing at tags at odd nibble + * @cmp: the tag to compare against + * @count: number of tags to test + * + * Return the number of successful tests. + * Thus a return value < @count indicates a failure. + * + * This is like checkN, but it runs backwards, checking the + * tags starting with @tag and then the tags preceding it. + * This is needed by the backwards-memory-copying operations. + */ +static int checkNrev(uint8_t *mem, int odd, int cmp, int count) +{ + int n = 0, diff; + + /* Replicate the test tag and compare. */ + cmp *= 0x11; + diff = *mem-- ^ cmp; + + if (!odd) { + goto start_even; + } + + while (1) { + /* Test odd tag. */ + if (unlikely((diff) & 0xf0)) { + break; + } + if (++n == count) { + break; + } + + start_even: + /* Test even tag. */ + if (unlikely((diff) & 0x0f)) { + break; + } + if (++n == count) { + break; + } + + diff = *mem-- ^ cmp; + } + return n; +} + +/** * mte_probe_int() - helper for mte_probe and mte_check * @env: CPU environment * @desc: MTEDESC descriptor @@ -732,8 +801,7 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, int mmu_idx, ptr_tag, bit55; uint64_t ptr_last, prev_page, next_page; uint64_t tag_first, tag_last; - uint64_t tag_byte_first, tag_byte_last; - uint32_t sizem1, tag_count, tag_size, n, c; + uint32_t sizem1, tag_count, n, c; uint8_t *mem1, *mem2; MMUAccessType type; @@ -763,19 +831,14 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, tag_last = QEMU_ALIGN_DOWN(ptr_last, TAG_GRANULE); tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; - /* Round the bounds to twice the tag granule, and compute the bytes. */ - tag_byte_first = QEMU_ALIGN_DOWN(ptr, 2 * TAG_GRANULE); - tag_byte_last = QEMU_ALIGN_DOWN(ptr_last, 2 * TAG_GRANULE); - /* Locate the page boundaries. */ prev_page = ptr & TARGET_PAGE_MASK; next_page = prev_page + TARGET_PAGE_SIZE; if (likely(tag_last - prev_page < TARGET_PAGE_SIZE)) { /* Memory access stays on one page. */ - tag_size = ((tag_byte_last - tag_byte_first) / (2 * TAG_GRANULE)) + 1; mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, sizem1 + 1, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); if (!mem1) { return 1; } @@ -783,14 +846,12 @@ static int mte_probe_int(CPUARMState *env, uint32_t desc, uint64_t ptr, n = checkN(mem1, ptr & TAG_GRANULE, ptr_tag, tag_count); } else { /* Memory access crosses to next page. */ - tag_size = (next_page - tag_byte_first) / (2 * TAG_GRANULE); mem1 = allocation_tag_mem(env, mmu_idx, ptr, type, next_page - ptr, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); - tag_size = ((tag_byte_last - next_page) / (2 * TAG_GRANULE)) + 1; mem2 = allocation_tag_mem(env, mmu_idx, next_page, type, ptr_last - next_page + 1, - MMU_DATA_LOAD, tag_size, ra); + MMU_DATA_LOAD, ra); /* * Perform all of the comparisons. @@ -918,7 +979,7 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); (void) probe_write(env, ptr, 1, mmu_idx, ra); mem = allocation_tag_mem(env, mmu_idx, align_ptr, MMU_DATA_STORE, - dcz_bytes, MMU_DATA_LOAD, tag_bytes, ra); + dcz_bytes, MMU_DATA_LOAD, ra); if (!mem) { goto done; } @@ -979,3 +1040,143 @@ uint64_t HELPER(mte_check_zva)(CPUARMState *env, uint32_t desc, uint64_t ptr) done: return useronly_clean_ptr(ptr); } + +uint64_t mte_mops_probe(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag, tag_first, tag_last; + void *mem; + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + uint32_t n; + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* True probe; this will never fault */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, + w ? MMU_DATA_STORE : MMU_DATA_LOAD, + size, MMU_DATA_LOAD, true, 0); + if (!mem) { + return size; + } + + /* + * TODO: checkN() is not designed for checks of the size we expect + * for FEAT_MOPS operations, so we should implement this differently. + * Maybe we should do something like + * if (region start and size are aligned nicely) { + * do direct loads of 64 tag bits at a time; + * } else { + * call checkN() + * } + */ + /* Round the bounds to the tag granule, and compute the number of tags. */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_first = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_last = QEMU_ALIGN_DOWN(ptr + size - 1, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; + n = checkN(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); + if (likely(n == tag_count)) { + return size; + } + + /* + * Failure; for the first granule, it's at @ptr. Otherwise + * it's at the first byte of the nth granule. Calculate how + * many bytes we can access without hitting that failure. + */ + if (n == 0) { + return 0; + } else { + return n * TAG_GRANULE - (ptr - tag_first); + } +} + +uint64_t mte_mops_probe_rev(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag, tag_first, tag_last; + void *mem; + bool w = FIELD_EX32(desc, MTEDESC, WRITE); + uint32_t n; + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* True probe; this will never fault */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, + w ? MMU_DATA_STORE : MMU_DATA_LOAD, + size, MMU_DATA_LOAD, true, 0); + if (!mem) { + return size; + } + + /* + * TODO: checkNrev() is not designed for checks of the size we expect + * for FEAT_MOPS operations, so we should implement this differently. + * Maybe we should do something like + * if (region start and size are aligned nicely) { + * do direct loads of 64 tag bits at a time; + * } else { + * call checkN() + * } + */ + /* Round the bounds to the tag granule, and compute the number of tags. */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_first = QEMU_ALIGN_DOWN(ptr - (size - 1), TAG_GRANULE); + tag_last = QEMU_ALIGN_DOWN(ptr, TAG_GRANULE); + tag_count = ((tag_last - tag_first) / TAG_GRANULE) + 1; + n = checkNrev(mem, ptr & TAG_GRANULE, ptr_tag, tag_count); + if (likely(n == tag_count)) { + return size; + } + + /* + * Failure; for the first granule, it's at @ptr. Otherwise + * it's at the last byte of the nth granule. Calculate how + * many bytes we can access without hitting that failure. + */ + if (n == 0) { + return 0; + } else { + return (n - 1) * TAG_GRANULE + ((ptr + 1) - tag_last); + } +} + +void mte_mops_set_tags(CPUARMState *env, uint64_t ptr, uint64_t size, + uint32_t desc) +{ + int mmu_idx, tag_count; + uint64_t ptr_tag; + void *mem; + + if (!desc) { + /* Tags not actually enabled */ + return; + } + + mmu_idx = FIELD_EX32(desc, MTEDESC, MIDX); + /* True probe: this will never fault */ + mem = allocation_tag_mem_probe(env, mmu_idx, ptr, MMU_DATA_STORE, size, + MMU_DATA_STORE, true, 0); + if (!mem) { + return; + } + + /* + * We know that ptr and size are both TAG_GRANULE aligned; store + * the tag from the pointer value into the tag memory. + */ + ptr_tag = allocation_tag_from_addr(ptr); + tag_count = size / TAG_GRANULE; + if (ptr & TAG_GRANULE) { + /* Not 2*TAG_GRANULE-aligned: store tag to first nibble */ + store_tag1_parallel(TAG_GRANULE, mem, ptr_tag); + mem++; + tag_count--; + } + memset(mem, ptr_tag | (ptr_tag << 4), tag_count / 2); + if (tag_count & 1) { + /* Final trailing unaligned nibble */ + mem += tag_count / 2; + store_tag1_parallel(0, mem, ptr_tag); + } +} diff --git a/target/arm/tcg/translate-a64.c b/target/arm/tcg/translate-a64.c index 1b6fbb6..97f25b4 100644 --- a/target/arm/tcg/translate-a64.c +++ b/target/arm/tcg/translate-a64.c @@ -105,9 +105,17 @@ void a64_translate_init(void) } /* - * Return the core mmu_idx to use for A64 "unprivileged load/store" insns + * Return the core mmu_idx to use for A64 load/store insns which + * have a "unprivileged load/store" variant. Those insns access + * EL0 if executed from an EL which has control over EL0 (usually + * EL1) but behave like normal loads and stores if executed from + * elsewhere (eg EL3). + * + * @unpriv : true for the unprivileged encoding; false for the + * normal encoding (in which case we will return the same + * thing as get_mem_index(). */ -static int get_a64_user_mem_index(DisasContext *s) +static int get_a64_user_mem_index(DisasContext *s, bool unpriv) { /* * If AccType_UNPRIV is not used, the insn uses AccType_NORMAL, @@ -115,7 +123,7 @@ static int get_a64_user_mem_index(DisasContext *s) */ ARMMMUIdx useridx = s->mmu_idx; - if (s->unpriv) { + if (unpriv && s->unpriv) { /* * We have pre-computed the condition for AccType_UNPRIV. * Therefore we should never get here with a mmu_idx for @@ -1453,6 +1461,10 @@ static bool trans_TBZ(DisasContext *s, arg_tbz *a) static bool trans_B_cond(DisasContext *s, arg_B_cond *a) { + /* BC.cond is only present with FEAT_HBC */ + if (a->c && !dc_isar_feature(aa64_hbc, s)) { + return false; + } reset_btype(s); if (a->cond < 0x0e) { /* genuinely conditional branches */ @@ -2260,7 +2272,7 @@ static void handle_sys(DisasContext *s, bool isread, clean_addr = clean_data_tbi(s, tcg_rt); gen_probe_access(s, clean_addr, MMU_DATA_STORE, MO_8); - if (s->ata) { + if (s->ata[0]) { /* Extract the tag from the register to match STZGM. */ tag = tcg_temp_new_i64(); tcg_gen_shri_i64(tag, tcg_rt, 56); @@ -2277,7 +2289,7 @@ static void handle_sys(DisasContext *s, bool isread, clean_addr = clean_data_tbi(s, tcg_rt); gen_helper_dc_zva(cpu_env, clean_addr); - if (s->ata) { + if (s->ata[0]) { /* Extract the tag from the register to match STZGM. */ tag = tcg_temp_new_i64(); tcg_gen_shri_i64(tag, tcg_rt, 56); @@ -3058,7 +3070,7 @@ static bool trans_STGP(DisasContext *s, arg_ldstpair *a) tcg_gen_qemu_st_i128(tmp, clean_addr, get_mem_index(s), mop); /* Perform the tag store, if tag access enabled. */ - if (s->ata) { + if (s->ata[0]) { if (tb_cflags(s->base.tb) & CF_PARALLEL) { gen_helper_stg_parallel(cpu_env, dirty_addr, dirty_addr); } else { @@ -3084,7 +3096,7 @@ static void op_addr_ldst_imm_pre(DisasContext *s, arg_ldst_imm *a, if (!a->p) { tcg_gen_addi_i64(*dirty_addr, *dirty_addr, offset); } - memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + memidx = get_a64_user_mem_index(s, a->unpriv); *clean_addr = gen_mte_check1_mmuidx(s, *dirty_addr, is_store, a->w || a->rn != 31, mop, a->unpriv, memidx); @@ -3105,7 +3117,7 @@ static bool trans_STR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + int memidx = get_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, true, mop); @@ -3123,7 +3135,7 @@ static bool trans_LDR_i(DisasContext *s, arg_ldst_imm *a) { bool iss_sf, iss_valid = !a->w; TCGv_i64 clean_addr, dirty_addr, tcg_rt; - int memidx = a->unpriv ? get_a64_user_mem_index(s) : get_mem_index(s); + int memidx = get_a64_user_mem_index(s, a->unpriv); MemOp mop = finalize_memop(s, a->sz + a->sign * MO_SIGN); op_addr_ldst_imm_pre(s, a, &clean_addr, &dirty_addr, a->imm, false, mop); @@ -3756,7 +3768,7 @@ static bool trans_STZGM(DisasContext *s, arg_ldst_tag *a) tcg_gen_addi_i64(addr, addr, a->imm); tcg_rt = cpu_reg(s, a->rt); - if (s->ata) { + if (s->ata[0]) { gen_helper_stzgm_tags(cpu_env, addr, tcg_rt); } /* @@ -3788,7 +3800,7 @@ static bool trans_STGM(DisasContext *s, arg_ldst_tag *a) tcg_gen_addi_i64(addr, addr, a->imm); tcg_rt = cpu_reg(s, a->rt); - if (s->ata) { + if (s->ata[0]) { gen_helper_stgm(cpu_env, addr, tcg_rt); } else { MMUAccessType acc = MMU_DATA_STORE; @@ -3820,7 +3832,7 @@ static bool trans_LDGM(DisasContext *s, arg_ldst_tag *a) tcg_gen_addi_i64(addr, addr, a->imm); tcg_rt = cpu_reg(s, a->rt); - if (s->ata) { + if (s->ata[0]) { gen_helper_ldgm(tcg_rt, cpu_env, addr); } else { MMUAccessType acc = MMU_DATA_LOAD; @@ -3855,7 +3867,7 @@ static bool trans_LDG(DisasContext *s, arg_ldst_tag *a) tcg_gen_andi_i64(addr, addr, -TAG_GRANULE); tcg_rt = cpu_reg(s, a->rt); - if (s->ata) { + if (s->ata[0]) { gen_helper_ldg(tcg_rt, cpu_env, addr, tcg_rt); } else { /* @@ -3892,7 +3904,7 @@ static bool do_STG(DisasContext *s, arg_ldst_tag *a, bool is_zero, bool is_pair) tcg_gen_addi_i64(addr, addr, a->imm); } tcg_rt = cpu_reg_sp(s, a->rt); - if (!s->ata) { + if (!s->ata[0]) { /* * For STG and ST2G, we need to check alignment and probe memory. * TODO: For STZG and STZ2G, we could rely on the stores below, @@ -3950,6 +3962,123 @@ TRANS_FEAT(STZG, aa64_mte_insn_reg, do_STG, a, true, false) TRANS_FEAT(ST2G, aa64_mte_insn_reg, do_STG, a, false, true) TRANS_FEAT(STZ2G, aa64_mte_insn_reg, do_STG, a, true, true) +typedef void SetFn(TCGv_env, TCGv_i32, TCGv_i32); + +static bool do_SET(DisasContext *s, arg_set *a, bool is_epilogue, + bool is_setg, SetFn fn) +{ + int memidx; + uint32_t syndrome, desc = 0; + + if (is_setg && !dc_isar_feature(aa64_mte, s)) { + return false; + } + + /* + * UNPREDICTABLE cases: we choose to UNDEF, which allows + * us to pull this check before the CheckMOPSEnabled() test + * (which we do in the helper function) + */ + if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || + a->rd == 31 || a->rn == 31) { + return false; + } + + memidx = get_a64_user_mem_index(s, a->unpriv); + + /* + * We pass option_a == true, matching our implementation; + * we pass wrong_option == false: helper function may set that bit. + */ + syndrome = syn_mop(true, is_setg, (a->nontemp << 1) | a->unpriv, + is_epilogue, false, true, a->rd, a->rs, a->rn); + + if (is_setg ? s->ata[a->unpriv] : s->mte_active[a->unpriv]) { + /* We may need to do MTE tag checking, so assemble the descriptor */ + desc = FIELD_DP32(desc, MTEDESC, TBI, s->tbid); + desc = FIELD_DP32(desc, MTEDESC, TCMA, s->tcma); + desc = FIELD_DP32(desc, MTEDESC, WRITE, true); + /* SIZEM1 and ALIGN we leave 0 (byte write) */ + } + /* The helper function always needs the memidx even with MTE disabled */ + desc = FIELD_DP32(desc, MTEDESC, MIDX, memidx); + + /* + * The helper needs the register numbers, but since they're in + * the syndrome anyway, we let it extract them from there rather + * than passing in an extra three integer arguments. + */ + fn(cpu_env, tcg_constant_i32(syndrome), tcg_constant_i32(desc)); + return true; +} + +TRANS_FEAT(SETP, aa64_mops, do_SET, a, false, false, gen_helper_setp) +TRANS_FEAT(SETM, aa64_mops, do_SET, a, false, false, gen_helper_setm) +TRANS_FEAT(SETE, aa64_mops, do_SET, a, true, false, gen_helper_sete) +TRANS_FEAT(SETGP, aa64_mops, do_SET, a, false, true, gen_helper_setgp) +TRANS_FEAT(SETGM, aa64_mops, do_SET, a, false, true, gen_helper_setgm) +TRANS_FEAT(SETGE, aa64_mops, do_SET, a, true, true, gen_helper_setge) + +typedef void CpyFn(TCGv_env, TCGv_i32, TCGv_i32, TCGv_i32); + +static bool do_CPY(DisasContext *s, arg_cpy *a, bool is_epilogue, CpyFn fn) +{ + int rmemidx, wmemidx; + uint32_t syndrome, rdesc = 0, wdesc = 0; + bool wunpriv = extract32(a->options, 0, 1); + bool runpriv = extract32(a->options, 1, 1); + + /* + * UNPREDICTABLE cases: we choose to UNDEF, which allows + * us to pull this check before the CheckMOPSEnabled() test + * (which we do in the helper function) + */ + if (a->rs == a->rn || a->rs == a->rd || a->rn == a->rd || + a->rd == 31 || a->rs == 31 || a->rn == 31) { + return false; + } + + rmemidx = get_a64_user_mem_index(s, runpriv); + wmemidx = get_a64_user_mem_index(s, wunpriv); + + /* + * We pass option_a == true, matching our implementation; + * we pass wrong_option == false: helper function may set that bit. + */ + syndrome = syn_mop(false, false, a->options, is_epilogue, + false, true, a->rd, a->rs, a->rn); + + /* If we need to do MTE tag checking, assemble the descriptors */ + if (s->mte_active[runpriv]) { + rdesc = FIELD_DP32(rdesc, MTEDESC, TBI, s->tbid); + rdesc = FIELD_DP32(rdesc, MTEDESC, TCMA, s->tcma); + } + if (s->mte_active[wunpriv]) { + wdesc = FIELD_DP32(wdesc, MTEDESC, TBI, s->tbid); + wdesc = FIELD_DP32(wdesc, MTEDESC, TCMA, s->tcma); + wdesc = FIELD_DP32(wdesc, MTEDESC, WRITE, true); + } + /* The helper function needs these parts of the descriptor regardless */ + rdesc = FIELD_DP32(rdesc, MTEDESC, MIDX, rmemidx); + wdesc = FIELD_DP32(wdesc, MTEDESC, MIDX, wmemidx); + + /* + * The helper needs the register numbers, but since they're in + * the syndrome anyway, we let it extract them from there rather + * than passing in an extra three integer arguments. + */ + fn(cpu_env, tcg_constant_i32(syndrome), tcg_constant_i32(wdesc), + tcg_constant_i32(rdesc)); + return true; +} + +TRANS_FEAT(CPYP, aa64_mops, do_CPY, a, false, gen_helper_cpyp) +TRANS_FEAT(CPYM, aa64_mops, do_CPY, a, false, gen_helper_cpym) +TRANS_FEAT(CPYE, aa64_mops, do_CPY, a, true, gen_helper_cpye) +TRANS_FEAT(CPYFP, aa64_mops, do_CPY, a, false, gen_helper_cpyfp) +TRANS_FEAT(CPYFM, aa64_mops, do_CPY, a, false, gen_helper_cpyfm) +TRANS_FEAT(CPYFE, aa64_mops, do_CPY, a, true, gen_helper_cpyfe) + typedef void ArithTwoOp(TCGv_i64, TCGv_i64, TCGv_i64); static bool gen_rri(DisasContext *s, arg_rri_sf *a, @@ -4012,7 +4141,7 @@ static bool gen_add_sub_imm_with_tags(DisasContext *s, arg_rri_tag *a, tcg_rn = cpu_reg_sp(s, a->rn); tcg_rd = cpu_reg_sp(s, a->rd); - if (s->ata) { + if (s->ata[0]) { gen_helper_addsubg(tcg_rd, cpu_env, tcg_rn, tcg_constant_i32(imm), tcg_constant_i32(a->uimm4)); @@ -5399,7 +5528,7 @@ static void disas_data_proc_2src(DisasContext *s, uint32_t insn) if (sf == 0 || !dc_isar_feature(aa64_mte_insn_reg, s)) { goto do_unallocated; } - if (s->ata) { + if (s->ata[0]) { gen_helper_irg(cpu_reg_sp(s, rd), cpu_env, cpu_reg_sp(s, rn), cpu_reg(s, rm)); } else { @@ -13890,7 +14019,8 @@ static void aarch64_tr_init_disas_context(DisasContextBase *dcbase, dc->bt = EX_TBFLAG_A64(tb_flags, BT); dc->btype = EX_TBFLAG_A64(tb_flags, BTYPE); dc->unpriv = EX_TBFLAG_A64(tb_flags, UNPRIV); - dc->ata = EX_TBFLAG_A64(tb_flags, ATA); + dc->ata[0] = EX_TBFLAG_A64(tb_flags, ATA); + dc->ata[1] = EX_TBFLAG_A64(tb_flags, ATA0); dc->mte_active[0] = EX_TBFLAG_A64(tb_flags, MTE_ACTIVE); dc->mte_active[1] = EX_TBFLAG_A64(tb_flags, MTE0_ACTIVE); dc->pstate_sm = EX_TBFLAG_A64(tb_flags, PSTATE_SM); diff --git a/target/arm/tcg/translate.h b/target/arm/tcg/translate.h index f748ba6..63922f8 100644 --- a/target/arm/tcg/translate.h +++ b/target/arm/tcg/translate.h @@ -114,8 +114,8 @@ typedef struct DisasContext { bool unpriv; /* True if v8.3-PAuth is active. */ bool pauth_active; - /* True if v8.5-MTE access to tags is enabled. */ - bool ata; + /* True if v8.5-MTE access to tags is enabled; index with is_unpriv. */ + bool ata[2]; /* True if v8.5-MTE tag checks affect the PE; index with is_unpriv. */ bool mte_active[2]; /* True with v8.5-BTI and SCTLR_ELx.BT* set. */ diff --git a/target/m68k/m68k-semi.c b/target/m68k/m68k-semi.c index 239f6e4..80cd8d7 100644 --- a/target/m68k/m68k-semi.c +++ b/target/m68k/m68k-semi.c @@ -15,6 +15,10 @@ * * You should have received a copy of the GNU General Public License * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * The semihosting protocol implemented here is described in the + * libgloss sources: + * https://sourceware.org/git/?p=newlib-cygwin.git;a=blob;f=libgloss/m68k/m68k-semi.txt;hb=HEAD */ #include "qemu/osdep.h" diff --git a/tests/tcg/aarch64/sysregs.c b/tests/tcg/aarch64/sysregs.c index d8eb06a..f7a055f 100644 --- a/tests/tcg/aarch64/sysregs.c +++ b/tests/tcg/aarch64/sysregs.c @@ -126,7 +126,7 @@ int main(void) */ get_cpu_reg_check_mask(id_aa64isar0_el1, _m(f0ff,ffff,f0ff,fff0)); get_cpu_reg_check_mask(id_aa64isar1_el1, _m(00ff,f0ff,ffff,ffff)); - get_cpu_reg_check_mask(SYS_ID_AA64ISAR2_EL1, _m(0000,0000,0000,ffff)); + get_cpu_reg_check_mask(SYS_ID_AA64ISAR2_EL1, _m(00ff,0000,00ff,ffff)); /* TGran4 & TGran64 as pegged to -1 */ get_cpu_reg_check_mask(id_aa64mmfr0_el1, _m(f000,0000,ff00,0000)); get_cpu_reg_check_mask(id_aa64mmfr1_el1, _m(0000,f000,0000,0000)); @@ -138,7 +138,7 @@ int main(void) get_cpu_reg_check_mask(id_aa64dfr0_el1, _m(0000,0000,0000,0006)); get_cpu_reg_check_zero(id_aa64dfr1_el1); get_cpu_reg_check_mask(SYS_ID_AA64ZFR0_EL1, _m(0ff0,ff0f,00ff,00ff)); - get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(80f1,00fd,0000,0000)); + get_cpu_reg_check_mask(SYS_ID_AA64SMFR0_EL1, _m(8ff1,fcff,0000,0000)); get_cpu_reg_check_zero(id_aa64afr0_el1); get_cpu_reg_check_zero(id_aa64afr1_el1); |