From c131b1d5da412b9b0f0681fc5704cdc9b7cafb04 Mon Sep 17 00:00:00 2001 From: Georg-Johann Lay Date: Mon, 8 Jan 2024 12:20:45 +0100 Subject: AVR: PR target/112952: Fix attribute "address", "io" and "io_low" so they work with all combinations of -f[no-]data-sections -f[no-]common. The patch also improves some diagnostics and adds additional checks, for example these attributes must only be applied to variables in static storage. gcc/ PR target/112952 * config/avr/avr.cc (avr_handle_addr_attribute): Also print valid range when diagnosing attribute "io" and "io_low" are out of range. (avr_eval_addr_attrib): Don't ICE on empty address at that place. (avr_insert_attributes): Reject if attribute "address", "io" or "io_low" in contexts other than static storage. (avr_asm_output_aligned_decl_common): Move output of decls with attribute "address", "io", and "io_low" to... (avr_output_addr_attrib): ...this new function. (avr_asm_asm_output_aligned_bss): Remove output for decls with attribute "address", "io", and "io_low". (avr_encode_section_info): Rectify handling of decls with attribute "address", "io", and "io_low". gcc/testsuite/ PR target/112952 * gcc.target/avr/attribute-io.h: New file. * gcc.target/avr/pr112952-0.c: New test. * gcc.target/avr/pr112952-1.c: New test. * gcc.target/avr/pr112952-2.c: New test. * gcc.target/avr/pr112952-3.c: New test. --- gcc/config/avr/avr.cc | 172 +++++++++++++++++++--------- gcc/testsuite/gcc.target/avr/attribute-io.h | 74 ++++++++++++ gcc/testsuite/gcc.target/avr/pr112952-0.c | 16 +++ gcc/testsuite/gcc.target/avr/pr112952-1.c | 16 +++ gcc/testsuite/gcc.target/avr/pr112952-2.c | 16 +++ gcc/testsuite/gcc.target/avr/pr112952-3.c | 16 +++ 6 files changed, 259 insertions(+), 51 deletions(-) create mode 100644 gcc/testsuite/gcc.target/avr/attribute-io.h create mode 100644 gcc/testsuite/gcc.target/avr/pr112952-0.c create mode 100644 gcc/testsuite/gcc.target/avr/pr112952-1.c create mode 100644 gcc/testsuite/gcc.target/avr/pr112952-2.c create mode 100644 gcc/testsuite/gcc.target/avr/pr112952-3.c diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc index b070035..0cdd035 100644 --- a/gcc/config/avr/avr.cc +++ b/gcc/config/avr/avr.cc @@ -10359,6 +10359,10 @@ avr_handle_addr_attribute (tree *node, tree name, tree args, int flags ATTRIBUTE_UNUSED, bool *no_add) { bool io_p = startswith (IDENTIFIER_POINTER (name), "io"); + HOST_WIDE_INT io_start = avr_arch->sfr_offset; + HOST_WIDE_INT io_end = strcmp (IDENTIFIER_POINTER (name), "io_low") == 0 + ? io_start + 0x1f + : io_start + 0x3f; location_t loc = DECL_SOURCE_LOCATION (*node); if (!VAR_P (*node)) @@ -10382,12 +10386,10 @@ avr_handle_addr_attribute (tree *node, tree name, tree args, } else if (io_p && (!tree_fits_shwi_p (arg) - || !(strcmp (IDENTIFIER_POINTER (name), "io_low") == 0 - ? low_io_address_operand : io_address_operand) - (GEN_INT (TREE_INT_CST_LOW (arg)), QImode))) + || ! IN_RANGE (TREE_INT_CST_LOW (arg), io_start, io_end))) { - warning_at (loc, OPT_Wattributes, "%qE attribute address " - "out of range", name); + warning_at (loc, OPT_Wattributes, "%qE attribute address out of " + "range 0x%x...0x%x", name, (int) io_start, (int) io_end); *no_add = true; } else @@ -10413,6 +10415,12 @@ avr_handle_addr_attribute (tree *node, tree name, tree args, warning_at (loc, OPT_Wattributes, "%qE attribute on non-volatile variable", name); + // Optimizers must not draw any conclusions from "static int addr;" etc. + // because the contents of `addr' are not given by its initializer but + // by the contents at the address as specified by the attribute. + if (VAR_P (*node) && ! *no_add) + TREE_THIS_VOLATILE (*node) = 1; + return NULL_TREE; } @@ -10430,7 +10438,6 @@ avr_eval_addr_attrib (rtx x) attr = lookup_attribute ("io", DECL_ATTRIBUTES (decl)); if (!attr || !TREE_VALUE (attr)) attr = lookup_attribute ("io_low", DECL_ATTRIBUTES (decl)); - gcc_assert (attr); } if (!attr || !TREE_VALUE (attr)) attr = lookup_attribute ("address", DECL_ATTRIBUTES (decl)); @@ -10686,6 +10693,17 @@ avr_pgm_check_var_decl (tree node) static void avr_insert_attributes (tree node, tree *attributes) { + if (VAR_P (node) + && ! TREE_STATIC (node) + && ! DECL_EXTERNAL (node)) + { + const char *names[] = { "io", "io_low", "address", NULL }; + for (const char **p = names; *p; ++p) + if (lookup_attribute (*p, *attributes)) + error ("variable %q+D with attribute %qs must be located in " + "static storage", node, *p); + } + avr_pgm_check_var_decl (node); if (TARGET_MAIN_IS_OS_TASK @@ -10746,37 +10764,11 @@ avr_insert_attributes (tree node, tree *attributes) /* Track need of __do_clear_bss. */ void -avr_asm_output_aligned_decl_common (FILE * stream, - tree decl, - const char *name, - unsigned HOST_WIDE_INT size, - unsigned int align, bool local_p) +avr_asm_output_aligned_decl_common (FILE *stream, tree /* decl */, + const char *name, + unsigned HOST_WIDE_INT size, + unsigned int align, bool local_p) { - rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl); - rtx symbol; - - if (mem != NULL_RTX && MEM_P (mem) - && SYMBOL_REF_P ((symbol = XEXP (mem, 0))) - && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS))) - { - if (!local_p) - { - fprintf (stream, "\t.globl\t"); - assemble_name (stream, name); - fprintf (stream, "\n"); - } - if (SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_ADDRESS) - { - assemble_name (stream, name); - fprintf (stream, " = %ld\n", - (long) INTVAL (avr_eval_addr_attrib (symbol))); - } - else if (local_p) - error_at (DECL_SOURCE_LOCATION (decl), - "static IO declaration for %q+D needs an address", decl); - return; - } - /* __gnu_lto_slim is just a marker for the linker injected by toplev.cc. There is no need to trigger __do_clear_bss code for them. */ @@ -10789,6 +10781,9 @@ avr_asm_output_aligned_decl_common (FILE * stream, ASM_OUTPUT_ALIGNED_COMMON (stream, name, size, align); } + +/* Implement `ASM_OUTPUT_ALIGNED_BSS'. */ + void avr_asm_asm_output_aligned_bss (FILE *file, tree decl, const char *name, unsigned HOST_WIDE_INT size, int align, @@ -10796,20 +10791,10 @@ avr_asm_asm_output_aligned_bss (FILE *file, tree decl, const char *name, (FILE *, tree, const char *, unsigned HOST_WIDE_INT, int)) { - rtx mem = decl == NULL_TREE ? NULL_RTX : DECL_RTL (decl); - rtx symbol; + if (!startswith (name, "__gnu_lto")) + avr_need_clear_bss_p = true; - if (mem != NULL_RTX && MEM_P (mem) - && SYMBOL_REF_P ((symbol = XEXP (mem, 0))) - && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS))) - { - if (!(SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_ADDRESS)) - error_at (DECL_SOURCE_LOCATION (decl), - "IO definition for %q+D needs an address", decl); - avr_asm_output_aligned_decl_common (file, decl, name, size, align, false); - } - else - default_func (file, decl, name, size, align); + default_func (file, decl, name, size, align); } @@ -10848,6 +10833,58 @@ avr_output_progmem_section_asm_op (const char *data) } +/* A noswitch section callback to output symbol definitions for + attributes "io", "io_low" and "address". */ + +static bool +avr_output_addr_attrib (tree decl, const char *name, + unsigned HOST_WIDE_INT /* size */, + unsigned HOST_WIDE_INT /* align */) +{ + gcc_assert (DECL_RTL_SET_P (decl)); + + FILE *stream = asm_out_file; + bool local_p = ! DECL_WEAK (decl) && ! TREE_PUBLIC (decl); + rtx symbol, mem = DECL_RTL (decl); + + if (mem != NULL_RTX && MEM_P (mem) + && SYMBOL_REF_P ((symbol = XEXP (mem, 0))) + && (SYMBOL_REF_FLAGS (symbol) & (SYMBOL_FLAG_IO | SYMBOL_FLAG_ADDRESS))) + { + if (! local_p) + { + fprintf (stream, "\t%s\t", DECL_WEAK (decl) ? ".weak" : ".globl"); + assemble_name (stream, name); + fprintf (stream, "\n"); + } + + if (SYMBOL_REF_FLAGS (symbol) & SYMBOL_FLAG_ADDRESS) + { + assemble_name (stream, name); + fprintf (stream, " = %ld\n", + (long) INTVAL (avr_eval_addr_attrib (symbol))); + } + else if (local_p) + { + const char *names[] = { "io", "io_low", "address", NULL }; + for (const char **p = names; *p; ++p) + if (lookup_attribute (*p, DECL_ATTRIBUTES (decl))) + { + error ("static attribute %qs declaration for %q+D needs an " + "address", *p, decl); + break; + } + } + + return true; + } + + gcc_unreachable(); + + return false; +} + + /* Implement `TARGET_ASM_INIT_SECTIONS'. */ static void @@ -10863,6 +10900,7 @@ avr_asm_init_sections (void) readonly_data_section->unnamed.callback = avr_output_data_section_asm_op; data_section->unnamed.callback = avr_output_data_section_asm_op; bss_section->unnamed.callback = avr_output_bss_section_asm_op; + tls_comm_section->noswitch.callback = avr_output_addr_attrib; } @@ -11045,15 +11083,17 @@ avr_encode_section_info (tree decl, rtx rtl, int new_decl_p) tree io_low_attr = lookup_attribute ("io_low", attr); tree io_attr = lookup_attribute ("io", attr); + tree address_attr = lookup_attribute ("address", attr); if (io_low_attr && TREE_VALUE (io_low_attr) && TREE_VALUE (TREE_VALUE (io_low_attr))) - addr_attr = io_attr; + addr_attr = io_low_attr; else if (io_attr && TREE_VALUE (io_attr) && TREE_VALUE (TREE_VALUE (io_attr))) addr_attr = io_attr; else - addr_attr = lookup_attribute ("address", attr); + addr_attr = address_attr; + if (io_low_attr || (io_attr && addr_attr && low_io_address_operand @@ -11068,6 +11108,36 @@ avr_encode_section_info (tree decl, rtx rtl, int new_decl_p) don't use the exact value for constant propagation. */ if (addr_attr && !DECL_EXTERNAL (decl)) SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_ADDRESS; + + if (io_attr || io_low_attr || address_attr) + { + if (DECL_INITIAL (decl)) + { + /* Initializers are not yet parsed in TARGET_INSERT_ATTRIBUTES, + hence deny initializers now. The values of symbols with an + address attribute are determined by the attribute, not by + some initializer. */ + + error ("variable %q+D with attribute %qs must not have an " + "initializer", decl, + io_low_attr ? "io_low" : io_attr ? "io" : "address"); + } + else + { + /* PR112952: The only way to output a variable declaration in a + custom manner is by means of a noswitch section callback. + There are only three noswitch sections: comm_section, + lcomm_section and tls_comm_section. And there is no way to + wire a custom noswitch section to a decl. As lcomm_section + is bypassed with -fdata-sections -fno-common, there is no + other way than making use of tls_comm_section. As we are + using that section anyway, also use it in the public case. */ + + DECL_COMMON (decl) = 1; + set_decl_section_name (decl, (const char*) nullptr); + set_decl_tls_model (decl, (tls_model) 2); + } + } } if (AVR_TINY diff --git a/gcc/testsuite/gcc.target/avr/attribute-io.h b/gcc/testsuite/gcc.target/avr/attribute-io.h new file mode 100644 index 0000000..39abd4e --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/attribute-io.h @@ -0,0 +1,74 @@ +/* { dg-do run } */ +/* { dg-options "-Os -save-temps" } */ + +__attribute__((address(1234))) +int g_1234; + +__attribute__((weak, address(4321))) +int w_4321; + +__attribute__((address(5678))) +static int l_5678; + +__attribute__((io_low(__AVR_SFR_OFFSET__ + 3))) +volatile unsigned char g_low; + +__attribute__((weak, io_low(__AVR_SFR_OFFSET__ + 2))) +volatile unsigned char w_low; + +__attribute__((io_low(__AVR_SFR_OFFSET__ + 1))) +static volatile unsigned char l_low; + +__attribute__((io(__AVR_SFR_OFFSET__ + 35))) +volatile unsigned char g_io; + +__attribute__((weak, io(__AVR_SFR_OFFSET__ + 34))) +volatile unsigned char w_io; + +__attribute__((io(__AVR_SFR_OFFSET__ + 33))) +static volatile unsigned char l_io; + +#define CMP(SYM, VAL) \ + do { \ + unsigned x; \ + __asm ("" : "=d" (x) : "0" (& SYM)); \ + if (x != VAL) \ + __builtin_abort(); \ + } while(0) + + +int main (void) +{ + CMP (g_1234, 1234); + CMP (w_4321, 4321); + CMP (l_5678, 5678); + + CMP (g_low, __AVR_SFR_OFFSET__ + 3); + CMP (w_low, __AVR_SFR_OFFSET__ + 2); + CMP (l_low, __AVR_SFR_OFFSET__ + 1); + + CMP (g_io, __AVR_SFR_OFFSET__ + 35); + CMP (w_io, __AVR_SFR_OFFSET__ + 34); + CMP (l_io, __AVR_SFR_OFFSET__ + 33); + + l_low = l_io; + g_low = g_io; + w_low = w_io; + l_low |= 1; + g_low |= 2; + w_low |= 4; + + return 0; +} + +/* { dg-final { scan-assembler "g_1234 = 1234" } } */ +/* { dg-final { scan-assembler "w_4321 = 4321" } } */ +/* { dg-final { scan-assembler "l_5678 = 5678" } } */ + +/* { dg-final { scan-assembler "\\.globl g_1234" } } */ +/* { dg-final { scan-assembler "\\.globl g_low" } } */ +/* { dg-final { scan-assembler "\\.globl g_io" } } */ + +/* { dg-final { scan-assembler "\\.weak w_4321" } } */ +/* { dg-final { scan-assembler "\\.weak w_low" } } */ +/* { dg-final { scan-assembler "\\.weak w_io" } } */ diff --git a/gcc/testsuite/gcc.target/avr/pr112952-0.c b/gcc/testsuite/gcc.target/avr/pr112952-0.c new file mode 100644 index 0000000..1870bf3 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr112952-0.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-Os -save-temps -fno-data-sections -fno-common" } */ + +#include "attribute-io.h" + +/* { dg-final { scan-assembler "g_1234 = 1234" } } */ +/* { dg-final { scan-assembler "w_4321 = 4321" } } */ +/* { dg-final { scan-assembler "l_5678 = 5678" } } */ + +/* { dg-final { scan-assembler "\\.globl g_1234" } } */ +/* { dg-final { scan-assembler "\\.globl g_low" } } */ +/* { dg-final { scan-assembler "\\.globl g_io" } } */ + +/* { dg-final { scan-assembler "\\.weak w_4321" } } */ +/* { dg-final { scan-assembler "\\.weak w_low" } } */ +/* { dg-final { scan-assembler "\\.weak w_io" } } */ diff --git a/gcc/testsuite/gcc.target/avr/pr112952-1.c b/gcc/testsuite/gcc.target/avr/pr112952-1.c new file mode 100644 index 0000000..6e7d273 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr112952-1.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-Os -save-temps -fno-data-sections -fcommon" } */ + +#include "attribute-io.h" + +/* { dg-final { scan-assembler "g_1234 = 1234" } } */ +/* { dg-final { scan-assembler "w_4321 = 4321" } } */ +/* { dg-final { scan-assembler "l_5678 = 5678" } } */ + +/* { dg-final { scan-assembler "\\.globl g_1234" } } */ +/* { dg-final { scan-assembler "\\.globl g_low" } } */ +/* { dg-final { scan-assembler "\\.globl g_io" } } */ + +/* { dg-final { scan-assembler "\\.weak w_4321" } } */ +/* { dg-final { scan-assembler "\\.weak w_low" } } */ +/* { dg-final { scan-assembler "\\.weak w_io" } } */ diff --git a/gcc/testsuite/gcc.target/avr/pr112952-2.c b/gcc/testsuite/gcc.target/avr/pr112952-2.c new file mode 100644 index 0000000..0ef0598 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr112952-2.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-Os -save-temps -fdata-sections -fno-common" } */ + +#include "attribute-io.h" + +/* { dg-final { scan-assembler "g_1234 = 1234" } } */ +/* { dg-final { scan-assembler "w_4321 = 4321" } } */ +/* { dg-final { scan-assembler "l_5678 = 5678" } } */ + +/* { dg-final { scan-assembler "\\.globl g_1234" } } */ +/* { dg-final { scan-assembler "\\.globl g_low" } } */ +/* { dg-final { scan-assembler "\\.globl g_io" } } */ + +/* { dg-final { scan-assembler "\\.weak w_4321" } } */ +/* { dg-final { scan-assembler "\\.weak w_low" } } */ +/* { dg-final { scan-assembler "\\.weak w_io" } } */ diff --git a/gcc/testsuite/gcc.target/avr/pr112952-3.c b/gcc/testsuite/gcc.target/avr/pr112952-3.c new file mode 100644 index 0000000..51da967 --- /dev/null +++ b/gcc/testsuite/gcc.target/avr/pr112952-3.c @@ -0,0 +1,16 @@ +/* { dg-do run } */ +/* { dg-options "-Os -save-temps -fdata-sections -fcommon" } */ + +#include "attribute-io.h" + +/* { dg-final { scan-assembler "g_1234 = 1234" } } */ +/* { dg-final { scan-assembler "w_4321 = 4321" } } */ +/* { dg-final { scan-assembler "l_5678 = 5678" } } */ + +/* { dg-final { scan-assembler "\\.globl g_1234" } } */ +/* { dg-final { scan-assembler "\\.globl g_low" } } */ +/* { dg-final { scan-assembler "\\.globl g_io" } } */ + +/* { dg-final { scan-assembler "\\.weak w_4321" } } */ +/* { dg-final { scan-assembler "\\.weak w_low" } } */ +/* { dg-final { scan-assembler "\\.weak w_io" } } */ -- cgit v1.1 From 7590d975ecfdae4f112b5086c017101c08f07e3e Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 8 Jan 2024 13:57:26 +0100 Subject: lower-bitint: Punt .*_OVERFLOW optimization if cast from IMAGPART_EXPR appears before REALPART_EXPR [PR113119] _BitInt lowering for .{ADD,SUB,MUL}_OVERFLOW calls which have both REALPART_EXPR and IMAGPART_EXPR used and have a cast from the IMAGPART_EXPR to a boolean or normal integral type lowers them at the point of the REALPART_EXPR statement (which is especially needed if the lhs of the call is complex with large/huge _BitInt element type); we emit the stmt to set the lhs of the cast at the same spot as well. Normally, the lowering of __builtin_{add,sub,mul}_overflow arranges the REALPART_EXPR to come before IMAGPART_EXPR, followed by cast from that, but as the testcase shows, a redundant __builtin_*_overflow call and VN can reorder those and we then ICE because the def-stmt of the former cast from IMAGPART_EXPR may appear after its uses. We already check that all of REALPART_EXPR, IMAGPART_EXPR and the cast from the latter appear in the same bb as the .{ADD,SUB,MUL}_OVERFLOW call in the optimization, the following patch just extends it to make sure cast appears after REALPART_EXPR; if not, we punt on the optimization and expand it as a store of a complex _BitInt on the location of the ifn call. Only the testcase in the testsuite is changed by the patch, all other __builtin_*_overflow* calls in the bitint* tests (and there are quite a few) have REALPART_EXPR first. 2024-01-08 Jakub Jelinek PR tree-optimization/113119 * gimple-lower-bitint.cc (optimizable_arith_overflow): Punt if both REALPART_EXPR and cast from IMAGPART_EXPR appear, but cast is before REALPART_EXPR. * gcc.dg/bitint-61.c: New test. --- gcc/gimple-lower-bitint.cc | 26 +++++++++++++++++++++++++- gcc/testsuite/gcc.dg/bitint-61.c | 17 +++++++++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/bitint-61.c diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index 9a80f93..3869a16 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -305,6 +305,7 @@ optimizable_arith_overflow (gimple *stmt) imm_use_iterator ui; use_operand_p use_p; int seen = 0; + gimple *realpart = NULL, *cast = NULL; FOR_EACH_IMM_USE_FAST (use_p, ui, lhs) { gimple *g = USE_STMT (use_p); @@ -317,6 +318,7 @@ optimizable_arith_overflow (gimple *stmt) if ((seen & 1) != 0) return 0; seen |= 1; + realpart = g; } else if (gimple_assign_rhs_code (g) == IMAGPART_EXPR) { @@ -338,13 +340,35 @@ optimizable_arith_overflow (gimple *stmt) if (!INTEGRAL_TYPE_P (TREE_TYPE (lhs2)) || TREE_CODE (TREE_TYPE (lhs2)) == BITINT_TYPE) return 0; + cast = use_stmt; } else return 0; } if ((seen & 2) == 0) return 0; - return seen == 3 ? 2 : 1; + if (seen == 3) + { + /* Punt if the cast stmt appears before realpart stmt, because + if both appear, the lowering wants to emit all the code + at the location of realpart stmt. */ + gimple_stmt_iterator gsi = gsi_for_stmt (realpart); + unsigned int cnt = 0; + do + { + gsi_prev_nondebug (&gsi); + if (gsi_end_p (gsi) || gsi_stmt (gsi) == cast) + return 0; + if (gsi_stmt (gsi) == stmt) + return 2; + /* If realpart is too far from stmt, punt as well. + Usually it will appear right after it. */ + if (++cnt == 32) + return 0; + } + while (1); + } + return 1; } /* If STMT is some kind of comparison (GIMPLE_COND, comparison assignment) diff --git a/gcc/testsuite/gcc.dg/bitint-61.c b/gcc/testsuite/gcc.dg/bitint-61.c new file mode 100644 index 0000000..2b2708a --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-61.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/113119 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +_BitInt(8) b; +_Bool c; +#if __BITINT_MAXWIDTH__ >= 8445 +_BitInt(8445) a; + +void +foo (_BitInt(4058) d) +{ + c = __builtin_add_overflow (a, 0ULL, &d); + __builtin_add_overflow (a, 0ULL, &d); + b = d; +} +#endif -- cgit v1.1 From efef8d7ff43c6c489fd6e7c52d71494d21324c87 Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 8 Jan 2024 13:58:28 +0100 Subject: lower-bitint: Fix up lowering of huge _BitInt 0 PHI args [PR113120] The PHI argument expansion of INTEGER_CSTs where bitint_min_cst_precision returns significantly smaller precision than the PHI result precision is optimized by loading the much smaller constant (if any) from memory and then either setting the remaining limbs to {} or calling memset with -1. The case where no constant is loaded (i.e. c == NULL) is when the INTEGER_CST is 0 or all_ones - in that case we can just set all the limbs to {} or call memset with -1 on everything. While for the all ones extension case that is what the code was already doing, I missed one spot in the zero extension case, where constricting the offset of the MEM_REF lhs of the = {} store it was using unconditionally the byte size of c, which obviously doesn't work if c is NULL. In that case we want to use zero offset. 2024-01-08 Jakub Jelinek PR tree-optimization/113120 * gimple-lower-bitint.cc (gimple_lower_bitint): Fix handling of very large _BitInt zero INTEGER_CST PHI argument. * gcc.dg/bitint-62.c: New test. --- gcc/gimple-lower-bitint.cc | 8 ++++++-- gcc/testsuite/gcc.dg/bitint-62.c | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 gcc/testsuite/gcc.dg/bitint-62.c diff --git a/gcc/gimple-lower-bitint.cc b/gcc/gimple-lower-bitint.cc index 3869a16..8993a61 100644 --- a/gcc/gimple-lower-bitint.cc +++ b/gcc/gimple-lower-bitint.cc @@ -6606,8 +6606,12 @@ gimple_lower_bitint (void) = build_array_type_nelts (large_huge.m_limb_type, nelts); tree ptype = build_pointer_type (TREE_TYPE (v1)); - tree off = fold_convert (ptype, - TYPE_SIZE_UNIT (TREE_TYPE (c))); + tree off; + if (c) + off = fold_convert (ptype, + TYPE_SIZE_UNIT (TREE_TYPE (c))); + else + off = build_zero_cst (ptype); tree vd = build2 (MEM_REF, vtype, build_fold_addr_expr (v1), off); g = gimple_build_assign (vd, build_zero_cst (vtype)); diff --git a/gcc/testsuite/gcc.dg/bitint-62.c b/gcc/testsuite/gcc.dg/bitint-62.c new file mode 100644 index 0000000..2c3139c --- /dev/null +++ b/gcc/testsuite/gcc.dg/bitint-62.c @@ -0,0 +1,32 @@ +/* PR tree-optimization/113120 */ +/* { dg-do compile { target bitint } } */ +/* { dg-options "-std=c23 -O2" } */ + +_BitInt(8) a; +_BitInt(55) b; + +#if __BITINT_MAXWIDTH__ >= 401 +static __attribute__((noinline, noclone)) void +foo (unsigned _BitInt(1) c, _BitInt(401) d) +{ + c /= d << b; + a = c; +} + +void +bar (void) +{ + foo (1, 4); +} +#endif + +#if __BITINT_MAXWIDTH__ >= 6928 +_BitInt(6928) +baz (int x, _BitInt(6928) y) +{ + if (x) + return y; + else + return 0; +} +#endif -- cgit v1.1 From 8c0dd8a6ff85d6e7b38957f2da400f5cfa8fef6b Mon Sep 17 00:00:00 2001 From: Jakub Jelinek Date: Mon, 8 Jan 2024 13:59:15 +0100 Subject: gimplify: Fix ICE in recalculate_side_effects [PR113228] The following testcase ICEs during regimplificatgion since the addition of (convert (eqne zero_one_valued_p@0 INTEGER_CST@1)) simplification. That simplification is novel in the sense that in gimplify_expr it can turn an expression (comparison in particular) into a SSA_NAME. Normally when gimplify_expr sees originally a SSA_NAME, it does case SSA_NAME: /* Allow callbacks into the gimplifier during optimization. */ ret = GS_ALL_DONE; break; and doesn't try to recalculate side effects because of that, but in this case gimplify_expr normally enters the: default: switch (TREE_CODE_CLASS (TREE_CODE (*expr_p))) { case tcc_comparison: then does *expr_p = gimple_boolify (*expr_p); and then *expr_p = fold_convert_loc (input_location, org_type, *expr_p); with this new match.pd simplification turns that tcc_comparison class into SSA_NAME. Unlike the outer SSA_NAME handling though, this falls through into recalculate_side_effects (*expr_p); dont_recalculate: break; but unfortunately recalculate_side_effects doesn't handle SSA_NAME and ICEs on it. SSA_NAMEs don't ever have TREE_SIDE_EFFECTS set on those, so the following patch fixes it by handling it similarly to the tcc_constant case. 2024-01-08 Jakub Jelinek PR tree-optimization/113228 * gimplify.cc (recalculate_side_effects): Do nothing for SSA_NAMEs. * gcc.c-torture/compile/pr113228.c: New test. --- gcc/gimplify.cc | 3 +++ gcc/testsuite/gcc.c-torture/compile/pr113228.c | 17 +++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 gcc/testsuite/gcc.c-torture/compile/pr113228.c diff --git a/gcc/gimplify.cc b/gcc/gimplify.cc index 15b5406..4ed7a07 100644 --- a/gcc/gimplify.cc +++ b/gcc/gimplify.cc @@ -3344,6 +3344,9 @@ recalculate_side_effects (tree t) return; default: + if (code == SSA_NAME) + /* No side-effects. */ + return; gcc_unreachable (); } } diff --git a/gcc/testsuite/gcc.c-torture/compile/pr113228.c b/gcc/testsuite/gcc.c-torture/compile/pr113228.c new file mode 100644 index 0000000..f460184 --- /dev/null +++ b/gcc/testsuite/gcc.c-torture/compile/pr113228.c @@ -0,0 +1,17 @@ +/* PR tree-optimization/113228 */ + +int a, b, c, d, i; + +void +foo (void) +{ + int k[3] = {}; + int *l = &a; + for (d = 0; c; c--) + for (i = 0; i <= 9; i++) + { + for (b = 1; b <= 4; b++) + k[0] = k[0] == 0; + *l |= k[d]; + } +} -- cgit v1.1 From b3cc5a1efead520bc977b4ba51f1328d01b3e516 Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Fri, 15 Dec 2023 10:32:29 +0100 Subject: tree-optimization/113026 - avoid vector epilog in more cases The following avoids creating a niter peeling epilog more consistently, matching what peeling later uses for the skip_vector condition, in particular when versioning is required which then also ensures the vector loop is entered unless the epilog is vectorized. This should ideally match LOOP_VINFO_VERSIONING_THRESHOLD which is only computed later, some refactoring could make that better matching. The patch also makes sure to adjust the upper bound of the epilogues when we do not have a skip edge around the vector loop. PR tree-optimization/113026 * tree-vect-loop.cc (vect_need_peeling_or_partial_vectors_p): Avoid an epilog in more cases. * tree-vect-loop-manip.cc (vect_do_peeling): Adjust the epilogues niter upper bounds and estimates. * gcc.dg/torture/pr113026-1.c: New testcase. * gcc.dg/torture/pr113026-2.c: Likewise. --- gcc/testsuite/gcc.dg/torture/pr113026-1.c | 11 +++++++++++ gcc/testsuite/gcc.dg/torture/pr113026-2.c | 18 +++++++++++++++++ gcc/tree-vect-loop-manip.cc | 32 +++++++++++++++++++++++++++++++ gcc/tree-vect-loop.cc | 6 +++++- 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 gcc/testsuite/gcc.dg/torture/pr113026-1.c create mode 100644 gcc/testsuite/gcc.dg/torture/pr113026-2.c diff --git a/gcc/testsuite/gcc.dg/torture/pr113026-1.c b/gcc/testsuite/gcc.dg/torture/pr113026-1.c new file mode 100644 index 0000000..56dfef3 --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr113026-1.c @@ -0,0 +1,11 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Wall" } */ + +char dst[16]; + +void +foo (char *src, long n) +{ + for (long i = 0; i < n; i++) + dst[i] = src[i]; /* { dg-bogus "" } */ +} diff --git a/gcc/testsuite/gcc.dg/torture/pr113026-2.c b/gcc/testsuite/gcc.dg/torture/pr113026-2.c new file mode 100644 index 0000000..b9d5857a --- /dev/null +++ b/gcc/testsuite/gcc.dg/torture/pr113026-2.c @@ -0,0 +1,18 @@ +/* { dg-do compile } */ +/* { dg-additional-options "-Wall" } */ + +char dst1[17]; +void +foo1 (char *src, long n) +{ + for (long i = 0; i < n; i++) + dst1[i] = src[i]; /* { dg-bogus "" } */ +} + +char dst2[18]; +void +foo2 (char *src, long n) +{ + for (long i = 0; i < n; i++) + dst2[i] = src[i]; /* { dg-bogus "" } */ +} diff --git a/gcc/tree-vect-loop-manip.cc b/gcc/tree-vect-loop-manip.cc index 9330183..927f76a 100644 --- a/gcc/tree-vect-loop-manip.cc +++ b/gcc/tree-vect-loop-manip.cc @@ -3364,6 +3364,38 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1, bb_before_epilog->count = single_pred_edge (bb_before_epilog)->count (); bb_before_epilog = loop_preheader_edge (epilog)->src; } + else + { + /* When we do not have a loop-around edge to the epilog we know + the vector loop covered at least VF scalar iterations unless + we have early breaks and the epilog will cover at most + VF - 1 + gap peeling iterations. + Update any known upper bound with this knowledge. */ + if (! LOOP_VINFO_EARLY_BREAKS (loop_vinfo)) + { + if (epilog->any_upper_bound) + epilog->nb_iterations_upper_bound -= lowest_vf; + if (epilog->any_likely_upper_bound) + epilog->nb_iterations_likely_upper_bound -= lowest_vf; + if (epilog->any_estimate) + epilog->nb_iterations_estimate -= lowest_vf; + } + unsigned HOST_WIDE_INT const_vf; + if (vf.is_constant (&const_vf)) + { + const_vf += LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo) - 1; + if (epilog->any_upper_bound) + epilog->nb_iterations_upper_bound + = wi::umin (epilog->nb_iterations_upper_bound, const_vf); + if (epilog->any_likely_upper_bound) + epilog->nb_iterations_likely_upper_bound + = wi::umin (epilog->nb_iterations_likely_upper_bound, + const_vf); + if (epilog->any_estimate) + epilog->nb_iterations_estimate + = wi::umin (epilog->nb_iterations_estimate, const_vf); + } + } /* If loop is peeled for non-zero constant times, now niters refers to orig_niters - prolog_peeling, it won't overflow even the orig_niters diff --git a/gcc/tree-vect-loop.cc b/gcc/tree-vect-loop.cc index a067716..9dd573e 100644 --- a/gcc/tree-vect-loop.cc +++ b/gcc/tree-vect-loop.cc @@ -1261,7 +1261,11 @@ vect_need_peeling_or_partial_vectors_p (loop_vec_info loop_vinfo) the epilogue is unnecessary. */ && (!LOOP_REQUIRES_VERSIONING (loop_vinfo) || ((unsigned HOST_WIDE_INT) max_niter - > (th / const_vf) * const_vf)))) + /* We'd like to use LOOP_VINFO_VERSIONING_THRESHOLD + but that's only computed later based on our result. + The following is the most conservative approximation. */ + > (std::max ((unsigned HOST_WIDE_INT) th, + const_vf) / const_vf) * const_vf)))) return true; return false; -- cgit v1.1 From 4b358f9b7348c50321f3ec1af6d56fa200c0889a Mon Sep 17 00:00:00 2001 From: Richard Biener Date: Mon, 8 Jan 2024 10:48:19 +0100 Subject: Clarify -mmovbe documentation It was noticed that -mmovbe doesn't use movbe for __builtin_bswap{32,64} when not optimizing. The follownig adjusts the documentation to say it will be used for optimizing and applies to all byte swaps, not just those carried out via builtin function calls. * doc/invoke.texi (-mmovbe): Clarify. --- gcc/doc/invoke.texi | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 68d1f36..8cf99f3 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -34708,8 +34708,8 @@ see @ref{Other Builtins} for details. @opindex mmovbe @item -mmovbe -This option enables use of the @code{movbe} instruction to implement -@code{__builtin_bswap32} and @code{__builtin_bswap64}. +This option enables use of the @code{movbe} instruction to optimize +byte swapping of four and eight byte entities. @opindex mshstk @item -mshstk -- cgit v1.1 From 52a2c659ae6c21f84b6acce0afcb9b93b9dc71a0 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 8 Jan 2024 15:12:44 +0100 Subject: GCN: Add pre-initial support for gfx1100 ROCm since 5.7.1 supports gfx1100 (RDNA3) cards. This commit adds support for it, mostly by assuming gfx1100 behaves identical to gfx1030. Like gfx1030, gfx1100 support is neither documented nor the build of the multilib enabled by default. But contrary to gfx1030, gfx1100 has a known issue causing some libraries not to build, including newlib: The sdwa variant of v_mov_b32_sdwa is not supported by the hardware but GCC current does generates this instruction. This will be addressed in a later commit. gcc/ChangeLog: * config.gcc (amdgcn-*-amdhsa): Accept --with-arch=gfx1100. * config/gcn/gcn-hsa.h (NO_XNACK): Add gfx1100: (ASM_SPEC): Handle gfx1100. * config/gcn/gcn-opts.h (enum processor_type): Add PROCESSOR_GFX1100. (enum gcn_isa): Add ISA_RDNA3. (TARGET_GFX1100, TARGET_RDNA2_PLUS, TARGET_RDNA3): Define. * config/gcn/gcn-valu.md: Change TARGET_RDNA2 to TARGET_RDNA2_PLUS. * config/gcn/gcn.cc (gcn_option_override, gcn_omp_device_kind_arch_isa, output_file_start): Handle gfx1100. (gcn_global_address_p, gcn_addr_space_legitimate_address_p): Change TARGET_RDNA2 to TARGET_RDNA2_PLUS. (gcn_hsa_declare_function_name): Don't use '.amdhsa_reserve_flat_scratch' with gfx1100. * config/gcn/gcn.h (ASSEMBLER_DIALECT): Likewise. (TARGET_CPU_CPP_BUILTINS): Define __RDNA3__, __gfx1030__ and __gfx1100__. * config/gcn/gcn.md: Change TARGET_RDNA2 to TARGET_RDNA2_PLUS. * config/gcn/gcn.opt (Enum gpu_type): Add gfx1100. * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX1100): Define. (isa_has_combined_avgprs, main): Handle gfx1100. * config/gcn/t-omp-device (isa): Add gfx1100. libgomp/ChangeLog: * plugin/plugin-gcn.c (gcn_gfx1100_s): New const string. (gcn_isa_name_len): Fix length. (isa_hsa_name, isa_code, max_isa_vgprs): Handle gfx1100. --- gcc/config.gcc | 2 +- gcc/config/gcn/gcn-hsa.h | 4 ++-- gcc/config/gcn/gcn-opts.h | 7 ++++++- gcc/config/gcn/gcn-valu.md | 10 +++++----- gcc/config/gcn/gcn.cc | 29 +++++++++++++++++++++-------- gcc/config/gcn/gcn.h | 10 +++++++--- gcc/config/gcn/gcn.md | 32 ++++++++++++++++---------------- gcc/config/gcn/gcn.opt | 3 +++ gcc/config/gcn/mkoffload.cc | 4 ++++ gcc/config/gcn/t-omp-device | 2 +- libgomp/plugin/plugin-gcn.c | 9 ++++++++- 11 files changed, 74 insertions(+), 38 deletions(-) diff --git a/gcc/config.gcc b/gcc/config.gcc index ce40b77..7e58339 100644 --- a/gcc/config.gcc +++ b/gcc/config.gcc @@ -4548,7 +4548,7 @@ case "${target}" in for which in arch tune; do eval "val=\$with_$which" case ${val} in - "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030) + "" | fiji | gfx900 | gfx906 | gfx908 | gfx90a | gfx1030 | gfx1100) # OK ;; *) diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h index 43bbe04..bf7079f 100644 --- a/gcc/config/gcn/gcn-hsa.h +++ b/gcc/config/gcn/gcn-hsa.h @@ -75,7 +75,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); supported for gcn. */ #define GOMP_SELF_SPECS "" -#define NO_XNACK "march=fiji:;march=gfx1030:;" \ +#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1100:;" \ /* These match the defaults set in gcn.cc. */ \ "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};" #define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;" @@ -91,7 +91,7 @@ extern unsigned int gcn_local_sym_hash (const char *name); "%{!march=*|march=fiji:--amdhsa-code-object-version=3} " \ "%{" NO_XNACK XNACKOPT "}" \ "%{" NO_SRAM_ECC SRAMOPT "} " \ - "%{march=gfx1030:-mattr=+wavefrontsize64} " \ + "%{march=gfx1030|march=gfx1100:-mattr=+wavefrontsize64} " \ "-filetype=obj" #define LINK_SPEC "--pie --export-dynamic" #define LIB_SPEC "-lc" diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h index 9a82cc8..79fbda3 100644 --- a/gcc/config/gcn/gcn-opts.h +++ b/gcc/config/gcn/gcn-opts.h @@ -25,7 +25,8 @@ enum processor_type PROCESSOR_VEGA20, // gfx906 PROCESSOR_GFX908, PROCESSOR_GFX90a, - PROCESSOR_GFX1030 + PROCESSOR_GFX1030, + PROCESSOR_GFX1100 }; #define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI) @@ -34,6 +35,7 @@ enum processor_type #define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908) #define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a) #define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030) +#define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100) /* Set in gcn_option_override. */ extern enum gcn_isa { @@ -41,6 +43,7 @@ extern enum gcn_isa { ISA_GCN3, ISA_GCN5, ISA_RDNA2, + ISA_RDNA3, ISA_CDNA1, ISA_CDNA2 } gcn_isa; @@ -54,6 +57,8 @@ extern enum gcn_isa { #define TARGET_CDNA2 (gcn_isa == ISA_CDNA2) #define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2) #define TARGET_RDNA2 (gcn_isa == ISA_RDNA2) +#define TARGET_RDNA2_PLUS (gcn_isa >= ISA_RDNA2 && gcn_isa < ISA_CDNA1) +#define TARGET_RDNA3 (gcn_isa == ISA_RDNA3) #define TARGET_M0_LDS_LIMIT (TARGET_GCN3) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 615e484..3d5b627 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -1417,7 +1417,7 @@ [(match_operand:V_noHI 1 "register_operand" " v") (match_operand:SI 2 "const_int_operand" " n")] UNSPEC_MOV_DPP_SHR))] - "!TARGET_RDNA2" + "!TARGET_RDNA2_PLUS" { return gcn_expand_dpp_shr_insn (mode, "v_mov_b32", UNSPEC_MOV_DPP_SHR, INTVAL (operands[2])); @@ -4211,7 +4211,7 @@ (unspec: [(match_operand:V_ALL 1 "register_operand")] REDUC_UNSPEC))] - "!TARGET_RDNA2" + "!TARGET_RDNA2_PLUS" { rtx tmp = gcn_expand_reduc_scalar (mode, operands[1], ); @@ -4265,7 +4265,7 @@ ; GCN3 requires a carry out, GCN5 not "!(TARGET_GCN3 && SCALAR_INT_MODE_P (mode) && == UNSPEC_PLUS_DPP_SHR) - && !TARGET_RDNA2" + && !TARGET_RDNA2_PLUS" { return gcn_expand_dpp_shr_insn (mode, "", , INTVAL (operands[3])); @@ -4310,7 +4310,7 @@ (match_operand:SI 3 "const_int_operand" "n")] UNSPEC_PLUS_CARRY_DPP_SHR)) (clobber (reg:DI VCC_REG))] - "!TARGET_RDNA2" + "!TARGET_RDNA2_PLUS" { return gcn_expand_dpp_shr_insn (mode, "v_add%^_u32", UNSPEC_PLUS_CARRY_DPP_SHR, @@ -4328,7 +4328,7 @@ (match_operand:DI 4 "register_operand" "cV")] UNSPEC_PLUS_CARRY_IN_DPP_SHR)) (clobber (reg:DI VCC_REG))] - "!TARGET_RDNA2" + "!TARGET_RDNA2_PLUS" { return gcn_expand_dpp_shr_insn (mode, "v_addc%^_u32", UNSPEC_PLUS_CARRY_IN_DPP_SHR, diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc index 79d3a65..50467bc 100644 --- a/gcc/config/gcn/gcn.cc +++ b/gcc/config/gcn/gcn.cc @@ -139,6 +139,7 @@ gcn_option_override (void) : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1 : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2 : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2 + : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3 : ISA_UNKNOWN); gcc_assert (gcn_isa != ISA_UNKNOWN); @@ -160,15 +161,17 @@ gcn_option_override (void) acc_lds_size = 32768; } - /* gfx803 "Fiji" and gfx1030 do not support XNACK. */ + /* gfx803 "Fiji", gfx1030 and gfx1100 do not support XNACK. */ if (gcn_arch == PROCESSOR_FIJI - || gcn_arch == PROCESSOR_GFX1030) + || gcn_arch == PROCESSOR_GFX1030 + || gcn_arch == PROCESSOR_GFX1100) { if (flag_xnack == HSACO_ATTR_ON) - error ("-mxnack=on is incompatible with -march=%s", + error ("%<-mxnack=on%> is incompatible with %<-march=%s%>", (gcn_arch == PROCESSOR_FIJI ? "fiji" - : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030" - : NULL)); + : gcn_arch == PROCESSOR_GFX1030 ? "gfx1030" + : gcn_arch == PROCESSOR_GFX1100 ? "gfx1100" + : NULL)); /* Allow HSACO_ATTR_ANY silently because that's the default. */ flag_xnack = HSACO_ATTR_OFF; } @@ -1592,7 +1595,7 @@ gcn_global_address_p (rtx addr) { rtx base = XEXP (addr, 0); rtx offset = XEXP (addr, 1); - int offsetbits = (TARGET_RDNA2 ? 11 : 12); + int offsetbits = (TARGET_RDNA2_PLUS ? 11 : 12); bool immediate_p = (CONST_INT_P (offset) && INTVAL (offset) >= -(1 << 12) && INTVAL (offset) < (1 << 12)); @@ -1725,7 +1728,7 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict, rtx base = XEXP (x, 0); rtx offset = XEXP (x, 1); - int offsetbits = (TARGET_RDNA2 ? 11 : 12); + int offsetbits = (TARGET_RDNA2_PLUS ? 11 : 12); bool immediate_p = (GET_CODE (offset) == CONST_INT /* Signed 12/13-bit immediate. */ && INTVAL (offset) >= -(1 << offsetbits) @@ -3043,6 +3046,8 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait, return gcn_arch == PROCESSOR_GFX90a; if (strcmp (name, "gfx1030") == 0) return gcn_arch == PROCESSOR_GFX1030; + if (strcmp (name, "gfx1100") == 0) + return gcn_arch == PROCESSOR_GFX1100; return 0; default: gcc_unreachable (); @@ -6539,6 +6544,11 @@ output_file_start (void) xnack = ""; sram_ecc = ""; break; + case PROCESSOR_GFX1100: + cpu = "gfx1100"; + xnack = ""; + sram_ecc = ""; + break; default: gcc_unreachable (); } @@ -6664,7 +6674,6 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree decl) "\t .amdhsa_next_free_vgpr\t%i\n" "\t .amdhsa_next_free_sgpr\t%i\n" "\t .amdhsa_reserve_vcc\t1\n" - "\t .amdhsa_reserve_flat_scratch\t0\n" "\t .amdhsa_reserve_xnack_mask\t%i\n" "\t .amdhsa_private_segment_fixed_size\t0\n" "\t .amdhsa_group_segment_fixed_size\t%u\n" @@ -6674,6 +6683,10 @@ gcn_hsa_declare_function_name (FILE *file, const char *name, tree decl) sgpr, xnack_enabled, LDS_SIZE); + /* Not supported with 'architected flat scratch'. */ + if (gcn_arch != PROCESSOR_GFX1100) + fprintf (file, + "\t .amdhsa_reserve_flat_scratch\t0\n"); if (gcn_arch == PROCESSOR_GFX90a) fprintf (file, "\t .amdhsa_accum_offset\t%i\n" diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h index c350cbb..c2afb5e 100644 --- a/gcc/config/gcn/gcn.h +++ b/gcc/config/gcn/gcn.h @@ -30,6 +30,8 @@ builtin_define ("__CDNA2__"); \ else if (TARGET_RDNA2) \ builtin_define ("__RDNA2__"); \ + else if (TARGET_RDNA3) \ + builtin_define ("__RDNA3__"); \ if (TARGET_FIJI) \ { \ builtin_define ("__fiji__"); \ @@ -41,11 +43,13 @@ builtin_define ("__gfx906__"); \ else if (TARGET_GFX908) \ builtin_define ("__gfx908__"); \ - else if (TARGET_GFX90a) \ - builtin_define ("__gfx90a__"); \ + else if (TARGET_GFX1030) \ + builtin_define ("__gfx1030"); \ + else if (TARGET_GFX1100) \ + builtin_define ("__gfx1100__"); \ } while (0) -#define ASSEMBLER_DIALECT (TARGET_RDNA2 ? 1 : 0) +#define ASSEMBLER_DIALECT (TARGET_RDNA2_PLUS ? 1 : 0) /* Support for a compile-time default architecture and tuning. The rules are: diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index e7815340..492b833 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -299,10 +299,10 @@ (define_attr "enabled" "" (cond [(and (eq_attr "rdna" "no") - (ne (symbol_ref "TARGET_RDNA2") (const_int 0))) + (ne (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0))) (const_int 0) (and (eq_attr "rdna" "yes") - (eq (symbol_ref "TARGET_RDNA2") (const_int 0))) + (eq (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0))) (const_int 0) (and (eq_attr "gcn_version" "gcn5") (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0))) @@ -2109,13 +2109,13 @@ return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" "buffer_gl0_inv" : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;" "buffer_wbinvl1_vol"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" "buffer_gl0_inv" : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;" @@ -2131,13 +2131,13 @@ return "s_dcache_wb_vol\;s_load%o0\t%0, %A1 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;" "s_waitcnt\t0\;buffer_gl0_inv" : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;" "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;" "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;" @@ -2180,11 +2180,11 @@ case 0: return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc" : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc" : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"); } @@ -2198,13 +2198,13 @@ return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;" "s_waitcnt\t0\;buffer_gl0_inv" : "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;" "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;" "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" : "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;" @@ -2252,13 +2252,13 @@ return "s_atomic_swap\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)\;" "s_dcache_wb_vol\;s_dcache_inv_vol"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "flat_atomic_swap\t%0, %1, %2 glc\;s_waitcnt\t0\;" "buffer_gl0_inv" : "flat_atomic_swap\t%0, %1, %2 glc\;s_waitcnt\t0\;" "buffer_wbinvl1_vol"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "global_atomic_swap\t%0, %A1, %2%O1 glc\;" "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" : "global_atomic_swap\t%0, %A1, %2%O1 glc\;" @@ -2273,13 +2273,13 @@ return "s_dcache_wb_vol\;s_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;flat_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\t0" : "buffer_wbinvl1_vol\;flat_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\t0"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;" "global_atomic_swap\t%0, %A1, %2%O1 glc\;" "s_waitcnt\tvmcnt(0)" @@ -2297,13 +2297,13 @@ return "s_dcache_wb_vol\;s_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol"; case 1: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;flat_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\t0\;buffer_gl0_inv" : "buffer_wbinvl1_vol\;flat_atomic_swap\t%0, %1, %2 glc\;" "s_waitcnt\t0\;buffer_wbinvl1_vol"); case 2: - return (TARGET_RDNA2 + return (TARGET_RDNA2_PLUS ? "buffer_gl0_inv\;" "global_atomic_swap\t%0, %A1, %2%O1 glc\;" "s_waitcnt\tvmcnt(0)\;buffer_gl0_inv" diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt index b3d7a18..842fd36 100644 --- a/gcc/config/gcn/gcn.opt +++ b/gcc/config/gcn/gcn.opt @@ -43,6 +43,9 @@ Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a) EnumValue Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030) +EnumValue +Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100) + march= Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI) Specify the name of the target GPU. diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 5b7de00..2cd201d 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -59,6 +59,8 @@ #define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f #undef EF_AMDGPU_MACH_AMDGCN_GFX1030 #define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36 +#undef EF_AMDGPU_MACH_AMDGCN_GFX1100 +#define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41 #define EF_AMDGPU_FEATURE_XNACK_V4 0x300 /* Mask. */ #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4 0x000 @@ -971,6 +973,8 @@ main (int argc, char **argv) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX90a; else if (strcmp (argv[i], "-march=gfx1030") == 0) elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1030; + else if (strcmp (argv[i], "-march=gfx1100") == 0) + elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX1100; #define STR "-mstack-size=" else if (startswith (argv[i], STR)) gcn_stack_size = atoi (argv[i] + strlen (STR)); diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device index b1cd998..2315ad5 100644 --- a/gcc/config/gcn/t-omp-device +++ b/gcc/config/gcn/t-omp-device @@ -1,4 +1,4 @@ omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc echo kind: gpu > $@ echo arch: amdgcn gcn >> $@ - echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 >> $@ + echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx1030 gfx1100 >> $@ diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index bc8131a..e3e8b31 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -1641,7 +1641,8 @@ const static char *gcn_gfx906_s = "gfx906"; const static char *gcn_gfx908_s = "gfx908"; const static char *gcn_gfx90a_s = "gfx90a"; const static char *gcn_gfx1030_s = "gfx1030"; -const static int gcn_isa_name_len = 6; +const static char *gcn_gfx1100_s = "gfx1100"; +const static int gcn_isa_name_len = 7; /* Returns the name that the HSA runtime uses for the ISA or NULL if we do not support the ISA. */ @@ -1662,6 +1663,8 @@ isa_hsa_name (int isa) { return gcn_gfx90a_s; case EF_AMDGPU_MACH_AMDGCN_GFX1030: return gcn_gfx1030_s; + case EF_AMDGPU_MACH_AMDGCN_GFX1100: + return gcn_gfx1100_s; } return NULL; } @@ -1704,6 +1707,9 @@ isa_code(const char *isa) { if (!strncmp (isa, gcn_gfx1030_s, gcn_isa_name_len)) return EF_AMDGPU_MACH_AMDGCN_GFX1030; + if (!strncmp (isa, gcn_gfx1100_s, gcn_isa_name_len)) + return EF_AMDGPU_MACH_AMDGCN_GFX1100; + return -1; } @@ -1719,6 +1725,7 @@ max_isa_vgprs (int isa) case EF_AMDGPU_MACH_AMDGCN_GFX906: case EF_AMDGPU_MACH_AMDGCN_GFX908: case EF_AMDGPU_MACH_AMDGCN_GFX1030: + case EF_AMDGPU_MACH_AMDGCN_GFX1100: return 256; case EF_AMDGPU_MACH_AMDGCN_GFX90a: return 512; -- cgit v1.1 From 97a52f69d209f69e755ffad6897c7176da9ac686 Mon Sep 17 00:00:00 2001 From: Tobias Burnus Date: Mon, 8 Jan 2024 15:18:10 +0100 Subject: amdgcn: Add gfx1100 to new XNACK defaults in mkoffload Commit r14-6997-g78dff4c25c1b95 added an arch-dependent SET_XNACK_OFF vs. SET_XNACK_ANY check; that was added between writing and committing the add-gfx1100 commit r14-7005-g52a2c659ae6c21 - and I missed to add it there. gcc/ChangeLog: * config/gcn/mkoffload.cc (main): Handle gfx1100 when setting the default XNACK. --- gcc/config/gcn/mkoffload.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc index 2cd201d..d4cd509 100644 --- a/gcc/config/gcn/mkoffload.cc +++ b/gcc/config/gcn/mkoffload.cc @@ -1018,6 +1018,7 @@ main (int argc, char **argv) case EF_AMDGPU_MACH_AMDGCN_GFX906: case EF_AMDGPU_MACH_AMDGCN_GFX908: case EF_AMDGPU_MACH_AMDGCN_GFX1030: + case EF_AMDGPU_MACH_AMDGCN_GFX1100: SET_XNACK_OFF (elf_flags); break; case EF_AMDGPU_MACH_AMDGCN_GFX90a: -- cgit v1.1 From eb846114ed7c49e426fccb826a3f81b7abbc84be Mon Sep 17 00:00:00 2001 From: Jonathan Wakely Date: Mon, 8 Jan 2024 11:46:56 +0000 Subject: libstdc++: Remove std::__unicode::__null_sentinel The name __null_sentinel is defined as a macro by newlib, so we can't use it as an identifier. That variable is not actually used by libstdc++, it was added because P2728R6 proposes std::uc::null_sentinel. Since we don't need it and it breaks bootstrap for newlib targets, just remove it. A null sentinel can still be used by constructing a _Null_sentinel_t object as needed, rather than having a named object of that type predefined. libstdc++-v3/ChangeLog: * include/bits/unicode.h (__null_sentinel): Remove. * testsuite/17_intro/names.cc: Add __null_sentinel. --- libstdc++-v3/include/bits/unicode.h | 2 -- libstdc++-v3/testsuite/17_intro/names.cc | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/libstdc++-v3/include/bits/unicode.h b/libstdc++-v3/include/bits/unicode.h index 66f8399..e49498a 100644 --- a/libstdc++-v3/include/bits/unicode.h +++ b/libstdc++-v3/include/bits/unicode.h @@ -83,8 +83,6 @@ namespace __unicode { return *__it == iter_value_t<_It>{}; } }; - inline constexpr _Null_sentinel_t __null_sentinel; - template _Sent = _Iter, typename _ErrorHandler = _Repl> diff --git a/libstdc++-v3/testsuite/17_intro/names.cc b/libstdc++-v3/testsuite/17_intro/names.cc index 5e77e9f..53c5aff 100644 --- a/libstdc++-v3/testsuite/17_intro/names.cc +++ b/libstdc++-v3/testsuite/17_intro/names.cc @@ -140,6 +140,7 @@ // These clash with newlib so don't use them. # define __lockable cannot be used as an identifier +# define __null_sentinel cannot be used as an identifier # define __packed cannot be used as an identifier # define __unused cannot be used as an identifier # define __used cannot be used as an identifier -- cgit v1.1 From 0056458550ba6df0a339589736729be8b886790a Mon Sep 17 00:00:00 2001 From: Harald Anlauf Date: Sun, 7 Jan 2024 22:24:25 +0100 Subject: Fortran: SIZE optional DIM argument having OPTIONAL+VALUE attributes [PR113245] gcc/fortran/ChangeLog: PR fortran/113245 * trans-intrinsic.cc (gfc_conv_intrinsic_size): Use gfc_conv_expr_present() for proper check of optional DIM argument. gcc/testsuite/ChangeLog: PR fortran/113245 * gfortran.dg/size_optional_dim_2.f90: New test. --- gcc/fortran/trans-intrinsic.cc | 4 +-- gcc/testsuite/gfortran.dg/size_optional_dim_2.f90 | 31 +++++++++++++++++++++++ 2 files changed, 32 insertions(+), 3 deletions(-) create mode 100644 gcc/testsuite/gfortran.dg/size_optional_dim_2.f90 diff --git a/gcc/fortran/trans-intrinsic.cc b/gcc/fortran/trans-intrinsic.cc index d973c49..7413926 100644 --- a/gcc/fortran/trans-intrinsic.cc +++ b/gcc/fortran/trans-intrinsic.cc @@ -8025,9 +8025,6 @@ gfc_conv_intrinsic_size (gfc_se * se, gfc_expr * expr) argse.data_not_needed = 1; gfc_conv_expr (&argse, actual->expr); gfc_add_block_to_block (&se->pre, &argse.pre); - cond = fold_build2_loc (input_location, NE_EXPR, logical_type_node, - argse.expr, null_pointer_node); - cond = gfc_evaluate_now (cond, &se->pre); /* 'block2' contains the arg2 absent case, 'block' the arg2 present case; size_var can be used in both blocks. */ tree size_var = gfc_create_var (TREE_TYPE (size), "size"); @@ -8038,6 +8035,7 @@ gfc_conv_intrinsic_size (gfc_se * se, gfc_expr * expr) tmp = fold_build2_loc (input_location, MODIFY_EXPR, TREE_TYPE (size_var), size_var, size); gfc_add_expr_to_block (&block2, tmp); + cond = gfc_conv_expr_present (actual->expr->symtree->n.sym); tmp = build3_v (COND_EXPR, cond, gfc_finish_block (&block), gfc_finish_block (&block2)); gfc_add_expr_to_block (&se->pre, tmp); diff --git a/gcc/testsuite/gfortran.dg/size_optional_dim_2.f90 b/gcc/testsuite/gfortran.dg/size_optional_dim_2.f90 new file mode 100644 index 0000000..698702b --- /dev/null +++ b/gcc/testsuite/gfortran.dg/size_optional_dim_2.f90 @@ -0,0 +1,31 @@ +! { dg-do run } +! { dg-additional-options "-fdump-tree-original" } +! PR fortran/113245 - SIZE, optional DIM argument, w/ OPTIONAL+VALUE attributes + +program p + implicit none + real :: a(2,3) + integer :: expect + expect = size (a,2) + call ref (a,2) + call val (a,2) + expect = size (a) + call ref (a) + call val (a) +contains + subroutine ref (x, dim) + real, intent(in) :: x(:,:) + integer, optional, intent(in) :: dim + print *, "present(dim), size(a,dim) =", present (dim), size (x,dim=dim) + if (size (x,dim=dim) /= expect) stop 1 + end + subroutine val (x, dim) + real, intent(in) :: x(:,:) + integer, optional, value :: dim + print *, "present(dim), size(a,dim) =", present (dim), size (x,dim=dim) + if (size (x,dim=dim) /= expect) stop 2 + end +end + +! Ensure inline code is generated: +! { dg-final { scan-tree-dump-not "_gfortran_size" "original" } } -- cgit v1.1 From a17299c17afeb92a56ef716d2d6380c8538493c4 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Thu, 4 Jan 2024 16:44:18 +0000 Subject: OpenMP: Support accelerated 2D/3D memory copies for AMD GCN This patch adds support for 2D/3D memory copies for omp_target_memcpy_rect using AMD extensions to the HSA API. This is just the AMD GCN-specific part of the following patch: https://gcc.gnu.org/pipermail/gcc-patches/2023-September/631001.html 2024-01-04 Julian Brown libgomp/ * plugin/plugin-gcn.c (hsa_runtime_fn_info): Add hsa_amd_memory_lock_fn, hsa_amd_memory_unlock_fn, hsa_amd_memory_async_copy_rect_fn function pointers. (init_hsa_runtime_functions): Add above functions, with DLSYM_OPT_FN. (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): New functions. --- libgomp/plugin/plugin-gcn.c | 362 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 362 insertions(+) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index e3e8b31..f24a28f 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -196,6 +196,16 @@ struct hsa_runtime_fn_info hsa_status_t (*hsa_code_object_deserialize_fn) (void *serialized_code_object, size_t serialized_code_object_size, const char *options, hsa_code_object_t *code_object); + hsa_status_t (*hsa_amd_memory_lock_fn) + (void *host_ptr, size_t size, hsa_agent_t *agents, int num_agent, + void **agent_ptr); + hsa_status_t (*hsa_amd_memory_unlock_fn) (void *host_ptr); + hsa_status_t (*hsa_amd_memory_async_copy_rect_fn) + (const hsa_pitched_ptr_t *dst, const hsa_dim3_t *dst_offset, + const hsa_pitched_ptr_t *src, const hsa_dim3_t *src_offset, + const hsa_dim3_t *range, hsa_agent_t copy_agent, + hsa_amd_copy_direction_t dir, uint32_t num_dep_signals, + const hsa_signal_t *dep_signals, hsa_signal_t completion_signal); }; /* Structure describing the run-time and grid properties of an HSA kernel @@ -1371,6 +1381,8 @@ init_hsa_runtime_functions (void) hsa_fns.function##_fn = dlsym (handle, #function); \ if (hsa_fns.function##_fn == NULL) \ return false; +#define DLSYM_OPT_FN(function) \ + hsa_fns.function##_fn = dlsym (handle, #function); void *handle = dlopen (hsa_runtime_lib, RTLD_LAZY); if (handle == NULL) return false; @@ -1405,7 +1417,11 @@ init_hsa_runtime_functions (void) DLSYM_FN (hsa_signal_load_acquire) DLSYM_FN (hsa_queue_destroy) DLSYM_FN (hsa_code_object_deserialize) + DLSYM_OPT_FN (hsa_amd_memory_lock) + DLSYM_OPT_FN (hsa_amd_memory_unlock) + DLSYM_OPT_FN (hsa_amd_memory_async_copy_rect) return true; +#undef DLSYM_OPT_FN #undef DLSYM_FN } @@ -3933,6 +3949,352 @@ GOMP_OFFLOAD_dev2dev (int device, void *dst, const void *src, size_t n) return true; } +/* Here _size refers to multiplied by size -- i.e. + measured in bytes. So we have: + + dim1_size: number of bytes to copy on innermost dimension ("row") + dim0_len: number of rows to copy + dst: base pointer for destination of copy + dst_offset1_size: innermost row offset (for dest), in bytes + dst_offset0_len: offset, number of rows (for dest) + dst_dim1_size: whole-array dest row length, in bytes (pitch) + src: base pointer for source of copy + src_offset1_size: innermost row offset (for source), in bytes + src_offset0_len: offset, number of rows (for source) + src_dim1_size: whole-array source row length, in bytes (pitch) +*/ + +int +GOMP_OFFLOAD_memcpy2d (int dst_ord, int src_ord, size_t dim1_size, + size_t dim0_len, void *dst, size_t dst_offset1_size, + size_t dst_offset0_len, size_t dst_dim1_size, + const void *src, size_t src_offset1_size, + size_t src_offset0_len, size_t src_dim1_size) +{ + if (!hsa_fns.hsa_amd_memory_lock_fn + || !hsa_fns.hsa_amd_memory_unlock_fn + || !hsa_fns.hsa_amd_memory_async_copy_rect_fn) + return -1; + + /* GCN hardware requires 4-byte alignment for base addresses & pitches. Bail + out quietly if we have anything oddly-aligned rather than letting the + driver raise an error. */ + if ((((uintptr_t) dst) & 3) != 0 || (((uintptr_t) src) & 3) != 0) + return -1; + + if ((dst_dim1_size & 3) != 0 || (src_dim1_size & 3) != 0) + return -1; + + /* Only handle host to device or device to host transfers here. */ + if ((dst_ord == -1 && src_ord == -1) + || (dst_ord != -1 && src_ord != -1)) + return -1; + + hsa_amd_copy_direction_t dir + = (src_ord == -1) ? hsaHostToDevice : hsaDeviceToHost; + hsa_agent_t copy_agent; + + /* We need to pin (lock) host memory before we start the transfer. Try to + lock the minimum size necessary, i.e. using partial first/last rows of the + whole array. Something like this: + + rows --> + .............. + c | ..#######+++++ <- first row apart from {src,dst}_offset1_size + o | ++#######+++++ <- whole row + l | ++#######+++++ <- " + s v ++#######..... <- last row apart from trailing remainder + .............. + + We could split very large transfers into several rectangular copies, but + that is unimplemented for now. */ + + size_t bounded_size_host, first_elem_offset_host; + void *host_ptr; + if (dir == hsaHostToDevice) + { + bounded_size_host = src_dim1_size * (dim0_len - 1) + dim1_size; + first_elem_offset_host = src_offset0_len * src_dim1_size + + src_offset1_size; + host_ptr = (void *) src; + struct agent_info *agent = get_agent_info (dst_ord); + copy_agent = agent->id; + } + else + { + bounded_size_host = dst_dim1_size * (dim0_len - 1) + dim1_size; + first_elem_offset_host = dst_offset0_len * dst_dim1_size + + dst_offset1_size; + host_ptr = dst; + struct agent_info *agent = get_agent_info (src_ord); + copy_agent = agent->id; + } + + void *agent_ptr; + + hsa_status_t status + = hsa_fns.hsa_amd_memory_lock_fn (host_ptr + first_elem_offset_host, + bounded_size_host, NULL, 0, &agent_ptr); + /* We can't lock the host memory: don't give up though, we might still be + able to use the slow path in our caller. So, don't make this an + error. */ + if (status != HSA_STATUS_SUCCESS) + return -1; + + hsa_pitched_ptr_t dstpp, srcpp; + hsa_dim3_t dst_offsets, src_offsets, ranges; + + int retval = 1; + + hsa_signal_t completion_signal; + status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &completion_signal); + if (status != HSA_STATUS_SUCCESS) + { + retval = -1; + goto unlock; + } + + if (dir == hsaHostToDevice) + { + srcpp.base = agent_ptr - first_elem_offset_host; + dstpp.base = dst; + } + else + { + srcpp.base = (void *) src; + dstpp.base = agent_ptr - first_elem_offset_host; + } + + srcpp.pitch = src_dim1_size; + srcpp.slice = 0; + + src_offsets.x = src_offset1_size; + src_offsets.y = src_offset0_len; + src_offsets.z = 0; + + dstpp.pitch = dst_dim1_size; + dstpp.slice = 0; + + dst_offsets.x = dst_offset1_size; + dst_offsets.y = dst_offset0_len; + dst_offsets.z = 0; + + ranges.x = dim1_size; + ranges.y = dim0_len; + ranges.z = 1; + + status + = hsa_fns.hsa_amd_memory_async_copy_rect_fn (&dstpp, &dst_offsets, &srcpp, + &src_offsets, &ranges, + copy_agent, dir, 0, NULL, + completion_signal); + /* If the rectangular copy fails, we might still be able to use the slow + path. We need to unlock the host memory though, so don't return + immediately. */ + if (status != HSA_STATUS_SUCCESS) + retval = -1; + else + hsa_fns.hsa_signal_wait_acquire_fn (completion_signal, + HSA_SIGNAL_CONDITION_LT, 1, UINT64_MAX, + HSA_WAIT_STATE_ACTIVE); + + hsa_fns.hsa_signal_destroy_fn (completion_signal); + +unlock: + status = hsa_fns.hsa_amd_memory_unlock_fn (host_ptr + first_elem_offset_host); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not unlock host memory", status); + + return retval; +} + +/* As above, _size refers to multiplied by size -- i.e. + measured in bytes. So we have: + + dim2_size: number of bytes to copy on innermost dimension ("row") + dim1_len: number of rows per slice to copy + dim0_len: number of slices to copy + dst: base pointer for destination of copy + dst_offset2_size: innermost row offset (for dest), in bytes + dst_offset1_len: offset, number of rows (for dest) + dst_offset0_len: offset, number of slices (for dest) + dst_dim2_size: whole-array dest row length, in bytes (pitch) + dst_dim1_len: whole-array number of rows in slice (for dest) + src: base pointer for source of copy + src_offset2_size: innermost row offset (for source), in bytes + src_offset1_len: offset, number of rows (for source) + src_offset0_len: offset, number of slices (for source) + src_dim2_size: whole-array source row length, in bytes (pitch) + src_dim1_len: whole-array number of rows in slice (for source) +*/ + +int +GOMP_OFFLOAD_memcpy3d (int dst_ord, int src_ord, size_t dim2_size, + size_t dim1_len, size_t dim0_len, void *dst, + size_t dst_offset2_size, size_t dst_offset1_len, + size_t dst_offset0_len, size_t dst_dim2_size, + size_t dst_dim1_len, const void *src, + size_t src_offset2_size, size_t src_offset1_len, + size_t src_offset0_len, size_t src_dim2_size, + size_t src_dim1_len) +{ + if (!hsa_fns.hsa_amd_memory_lock_fn + || !hsa_fns.hsa_amd_memory_unlock_fn + || !hsa_fns.hsa_amd_memory_async_copy_rect_fn) + return -1; + + /* GCN hardware requires 4-byte alignment for base addresses & pitches. Bail + out quietly if we have anything oddly-aligned rather than letting the + driver raise an error. */ + if ((((uintptr_t) dst) & 3) != 0 || (((uintptr_t) src) & 3) != 0) + return -1; + + if ((dst_dim2_size & 3) != 0 || (src_dim2_size & 3) != 0) + return -1; + + /* Only handle host to device or device to host transfers here. */ + if ((dst_ord == -1 && src_ord == -1) + || (dst_ord != -1 && src_ord != -1)) + return -1; + + hsa_amd_copy_direction_t dir + = (src_ord == -1) ? hsaHostToDevice : hsaDeviceToHost; + hsa_agent_t copy_agent; + + /* We need to pin (lock) host memory before we start the transfer. Try to + lock the minimum size necessary, i.e. using partial first/last slices of + the whole 3D array. Something like this: + + slice 0: slice 1: slice 2: + __________ __________ __________ + ^ /+++++++++/ : /+++++++++/ : / / + column /+++##++++/| | /+++##++++/| | /+++## / # = subarray + / / ##++++/ | |/+++##++++/ | |/+++##++++/ + = area to pin + /_________/ : /_________/ : /_________/ + row ---> + + We could split very large transfers into several rectangular copies, but + that is unimplemented for now. */ + + size_t bounded_size_host, first_elem_offset_host; + void *host_ptr; + if (dir == hsaHostToDevice) + { + size_t slice_bytes = src_dim2_size * src_dim1_len; + bounded_size_host = slice_bytes * (dim0_len - 1) + + src_dim2_size * (dim1_len - 1) + + dim2_size; + first_elem_offset_host = src_offset0_len * slice_bytes + + src_offset1_len * src_dim2_size + + src_offset2_size; + host_ptr = (void *) src; + struct agent_info *agent = get_agent_info (dst_ord); + copy_agent = agent->id; + } + else + { + size_t slice_bytes = dst_dim2_size * dst_dim1_len; + bounded_size_host = slice_bytes * (dim0_len - 1) + + dst_dim2_size * (dim1_len - 1) + + dim2_size; + first_elem_offset_host = dst_offset0_len * slice_bytes + + dst_offset1_len * dst_dim2_size + + dst_offset2_size; + host_ptr = dst; + struct agent_info *agent = get_agent_info (src_ord); + copy_agent = agent->id; + } + + void *agent_ptr; + + hsa_status_t status + = hsa_fns.hsa_amd_memory_lock_fn (host_ptr + first_elem_offset_host, + bounded_size_host, NULL, 0, &agent_ptr); + /* We can't lock the host memory: don't give up though, we might still be + able to use the slow path in our caller (maybe even with iterated memcpy2d + calls). So, don't make this an error. */ + if (status != HSA_STATUS_SUCCESS) + return -1; + + hsa_pitched_ptr_t dstpp, srcpp; + hsa_dim3_t dst_offsets, src_offsets, ranges; + + int retval = 1; + + hsa_signal_t completion_signal; + status = hsa_fns.hsa_signal_create_fn (1, 0, NULL, &completion_signal); + if (status != HSA_STATUS_SUCCESS) + { + retval = -1; + goto unlock; + } + + if (dir == hsaHostToDevice) + { + srcpp.base = agent_ptr - first_elem_offset_host; + dstpp.base = dst; + } + else + { + srcpp.base = (void *) src; + dstpp.base = agent_ptr - first_elem_offset_host; + } + + /* Pitch is measured in bytes. */ + srcpp.pitch = src_dim2_size; + /* Slice is also measured in bytes (i.e. total per-slice). */ + srcpp.slice = src_dim2_size * src_dim1_len; + + src_offsets.x = src_offset2_size; + src_offsets.y = src_offset1_len; + src_offsets.z = src_offset0_len; + + /* As above. */ + dstpp.pitch = dst_dim2_size; + dstpp.slice = dst_dim2_size * dst_dim1_len; + + dst_offsets.x = dst_offset2_size; + dst_offsets.y = dst_offset1_len; + dst_offsets.z = dst_offset0_len; + + ranges.x = dim2_size; + ranges.y = dim1_len; + ranges.z = dim0_len; + + status + = hsa_fns.hsa_amd_memory_async_copy_rect_fn (&dstpp, &dst_offsets, &srcpp, + &src_offsets, &ranges, + copy_agent, dir, 0, NULL, + completion_signal); + /* If the rectangular copy fails, we might still be able to use the slow + path. We need to unlock the host memory though, so don't return + immediately. */ + if (status != HSA_STATUS_SUCCESS) + retval = -1; + else + { + hsa_signal_value_t sv + = hsa_fns.hsa_signal_wait_acquire_fn (completion_signal, + HSA_SIGNAL_CONDITION_LT, 1, + UINT64_MAX, + HSA_WAIT_STATE_ACTIVE); + if (sv < 0) + { + GCN_WARNING ("async copy rect failure"); + retval = -1; + } + } + + hsa_fns.hsa_signal_destroy_fn (completion_signal); + +unlock: + status = hsa_fns.hsa_amd_memory_unlock_fn (host_ptr + first_elem_offset_host); + if (status != HSA_STATUS_SUCCESS) + hsa_fatal ("Could not unlock host memory", status); + + return retval; +} + /* }}} */ /* {{{ OpenMP Plugin API */ -- cgit v1.1 From 4a5bb8bc1c562ba4d6e97f1979de6b8ac566f04f Mon Sep 17 00:00:00 2001 From: Cupertino Miranda Date: Thu, 30 Nov 2023 18:13:34 +0000 Subject: btf: print string offset in comment When using -dA, this function was only printing as comment btf_string or btf_aux_string. This patch changes the comment to also include the position of the string within the section in hexadecimal format. gcc/ChangeLog: * btfout.cc (output_btf_strs): Changed. --- gcc/btfout.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gcc/btfout.cc b/gcc/btfout.cc index bf3183d..669d357d 100644 --- a/gcc/btfout.cc +++ b/gcc/btfout.cc @@ -1105,17 +1105,20 @@ static void output_btf_strs (ctf_container_ref ctfc) { ctf_string_t * ctf_string = ctfc->ctfc_strtable.ctstab_head; + static int str_pos = 0; while (ctf_string) { - dw2_asm_output_nstring (ctf_string->cts_str, -1, "btf_string"); + dw2_asm_output_nstring (ctf_string->cts_str, -1, "btf_string, str_pos = 0x%x", str_pos); + str_pos += strlen(ctf_string->cts_str) + 1; ctf_string = ctf_string->cts_next; } ctf_string = ctfc->ctfc_aux_strtable.ctstab_head; while (ctf_string) { - dw2_asm_output_nstring (ctf_string->cts_str, -1, "btf_aux_string"); + dw2_asm_output_nstring (ctf_string->cts_str, -1, "btf_aux_string, str_pos = 0x%x", str_pos); + str_pos += strlen(ctf_string->cts_str) + 1; ctf_string = ctf_string->cts_next; } } -- cgit v1.1 From dfc88fb51c1f9c26215adf6a308c18e23992cdd9 Mon Sep 17 00:00:00 2001 From: Cupertino Miranda Date: Wed, 3 Jan 2024 11:37:34 +0000 Subject: bpf: Correct BTF for kernel_helper attributed decls This patch fix a problem with kernel_helper attribute BTF information, which incorrectly generates BTF_KIND_FUNC entry. This BTF entry although accurate with traditional extern function declarations, once the function is attributed with kernel_helper, it is semantically incompatible of the kernel helpers in BPF infrastructure. gcc/ChangeLog: PR target/113225 * btfout.cc (btf_collect_datasec): Skip creating BTF info for extern and kernel_helper attributed function decls. gcc/testsuite/ChangeLog: * gcc.target/bpf/attr-kernel-helper.c: New test. --- gcc/btfout.cc | 7 +++++++ gcc/testsuite/gcc.target/bpf/attr-kernel-helper.c | 15 +++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 gcc/testsuite/gcc.target/bpf/attr-kernel-helper.c diff --git a/gcc/btfout.cc b/gcc/btfout.cc index 669d357d..dcf751f 100644 --- a/gcc/btfout.cc +++ b/gcc/btfout.cc @@ -35,6 +35,8 @@ along with GCC; see the file COPYING3. If not see #include "diagnostic-core.h" #include "cgraph.h" #include "varasm.h" +#include "stringpool.h" /* For lookup_attribute. */ +#include "attribs.h" /* For lookup_attribute. */ #include "dwarf2out.h" /* For lookup_decl_die. */ static int btf_label_num; @@ -440,6 +442,11 @@ btf_collect_datasec (ctf_container_ref ctfc) if (dtd == NULL) continue; + if (DECL_EXTERNAL (func->decl) + && (lookup_attribute ("kernel_helper", + DECL_ATTRIBUTES (func->decl))) != NULL_TREE) + continue; + /* Functions actually get two types: a BTF_KIND_FUNC_PROTO, and also a BTF_KIND_FUNC. But the CTF container only allocates one type per function, which matches closely with BTF_KIND_FUNC_PROTO. diff --git a/gcc/testsuite/gcc.target/bpf/attr-kernel-helper.c b/gcc/testsuite/gcc.target/bpf/attr-kernel-helper.c new file mode 100644 index 0000000..7c5a000 --- /dev/null +++ b/gcc/testsuite/gcc.target/bpf/attr-kernel-helper.c @@ -0,0 +1,15 @@ +/* Basic test for kernel_helper attribute BTF information. */ + +/* { dg-do compile } */ +/* { dg-options "-O0 -dA -gbtf" } */ + +extern int foo_helper(int) __attribute((kernel_helper(42))); +extern int foo_nohelper(int); + +int bar (int arg) +{ + return foo_helper (arg) + foo_nohelper (arg); +} + +/* { dg-final { scan-assembler-times "BTF_KIND_FUNC 'foo_nohelper'" 1 } } */ +/* { dg-final { scan-assembler-times "BTF_KIND_FUNC 'foo_helper'" 0 } } */ -- cgit v1.1 From ba4cfef0a45dd50e16ca7bee158bc630fa646ee7 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Mon, 8 Jan 2024 10:11:59 +0100 Subject: asan: Do not call asan_function_start () without the current function [PR113251] Using ASAN on i686-linux with -fPIC causes an ICE, because when pc_thunks are generated, there is no current function anymore, but asan_function_start () expects one. Fix by not calling asan_function_start () without one. A narrower fix would be to temporarily disable ASAN around pc_thunk generation. However, the issue looks generic enough, and may affect less often tested configurations, so go for a broader fix. Fixes: e66dc37b299c ("asan: Align .LASANPC on function boundary") Suggested-by: Jakub Jelinek Signed-off-by: Ilya Leoshkevich gcc/ChangeLog: PR sanitizer/113251 * varasm.cc (assemble_function_label_raw): Do not call asan_function_start () without the current function. --- gcc/varasm.cc | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/gcc/varasm.cc b/gcc/varasm.cc index 25c1e05..1a869ae 100644 --- a/gcc/varasm.cc +++ b/gcc/varasm.cc @@ -1845,7 +1845,9 @@ assemble_function_label_raw (FILE *file, const char *name) ASM_OUTPUT_LABEL (file, name); if ((flag_sanitize & SANITIZE_ADDRESS) /* Notify ASAN only about the first function label. */ - && (in_cold_section_p == first_function_block_is_cold)) + && (in_cold_section_p == first_function_block_is_cold) + /* Do not notify ASAN when called from, e.g., code_end (). */ + && cfun) asan_function_start (); } -- cgit v1.1 From 0d0908c36542635b28d14961f8fa0e28477a3202 Mon Sep 17 00:00:00 2001 From: Joseph Myers Date: Mon, 8 Jan 2024 18:52:09 +0000 Subject: MAINTAINERS: Update my email address * MAINTAINERS: Update my email address. --- MAINTAINERS | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index fe5d95a..882694c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -34,7 +34,7 @@ Jeff Law Michael Meissner Jason Merrill David S. Miller -Joseph Myers +Joseph Myers Richard Sandiford Bernd Schmidt Ian Lance Taylor @@ -155,7 +155,7 @@ cygwin, mingw-w64 Jonathan Yong <10walls@gmail.com> Language Front Ends Maintainers -C front end/ISO C99 Joseph Myers +C front end/ISO C99 Joseph Myers Ada front end Arnaud Charlet Ada front end Eric Botcazou Ada front end Marc Poulhiès @@ -192,7 +192,7 @@ libquadmath Jakub Jelinek libvtv Caroline Tice libphobos Iain Buclaw line map Dodji Seketeli -soft-fp Joseph Myers +soft-fp Joseph Myers scheduler (+ haifa) Jim Wilson scheduler (+ haifa) Michael Meissner scheduler (+ haifa) Jeff Law @@ -219,7 +219,7 @@ jump.cc David S. Miller web pages Gerald Pfeifer config.sub/config.guess Ben Elliston i18n Philipp Thomas -i18n Joseph Myers +i18n Joseph Myers diagnostic messages Dodji Seketeli diagnostic messages David Malcolm build machinery (*.in) Paolo Bonzini @@ -227,14 +227,14 @@ build machinery (*.in) Nathanael Nerode build machinery (*.in) Alexandre Oliva build machinery (*.in) Ralf Wildenhues docs co-maintainer Gerald Pfeifer -docs co-maintainer Joseph Myers +docs co-maintainer Joseph Myers docs co-maintainer Sandra Loosemore docstring relicensing Gerald Pfeifer -docstring relicensing Joseph Myers +docstring relicensing Joseph Myers predict.def Jan Hubicka gcov Jan Hubicka gcov Nathan Sidwell -option handling Joseph Myers +option handling Joseph Myers middle-end Jeff Law middle-end Ian Lance Taylor middle-end Richard Biener @@ -278,7 +278,7 @@ CTF, BTF, bpf port David Faust dataflow Paolo Bonzini dataflow Seongbae Park dataflow Kenneth Zadeck -driver Joseph Myers +driver Joseph Myers Fortran Harald Anlauf Fortran Janne Blomqvist Fortran Tobias Burnus -- cgit v1.1 From f9290cdf4697f467fd0fb7c710f58cc12e497889 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Mon, 8 Jan 2024 20:35:27 +0100 Subject: GCN: Add pre-initial support for gfx1100: 'EF_AMDGPU_MACH_AMDGCN_GFX1100' MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ../../../source-gcc/libgomp/plugin/plugin-gcn.c: In function ‘isa_hsa_name’: ../../../source-gcc/libgomp/plugin/plugin-gcn.c:1666:10: error: ‘EF_AMDGPU_MACH_AMDGCN_GFX1100’ undeclared (first use in this function); did you mean ‘EF_AMDGPU_MACH_AMDGCN_GFX1030’? 1666 | case EF_AMDGPU_MACH_AMDGCN_GFX1100: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | EF_AMDGPU_MACH_AMDGCN_GFX1030 ../../../source-gcc/libgomp/plugin/plugin-gcn.c:1666:10: note: each undeclared identifier is reported only once for each function it appears in ../../../source-gcc/libgomp/plugin/plugin-gcn.c: In function ‘isa_code’: ../../../source-gcc/libgomp/plugin/plugin-gcn.c:1711:12: error: ‘EF_AMDGPU_MACH_AMDGCN_GFX1100’ undeclared (first use in this function); did you mean ‘EF_AMDGPU_MACH_AMDGCN_GFX1030’? 1711 | return EF_AMDGPU_MACH_AMDGCN_GFX1100; | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | EF_AMDGPU_MACH_AMDGCN_GFX1030 ../../../source-gcc/libgomp/plugin/plugin-gcn.c: In function ‘max_isa_vgprs’: ../../../source-gcc/libgomp/plugin/plugin-gcn.c:1728:10: error: ‘EF_AMDGPU_MACH_AMDGCN_GFX1100’ undeclared (first use in this function); did you mean ‘EF_AMDGPU_MACH_AMDGCN_GFX1030’? 1728 | case EF_AMDGPU_MACH_AMDGCN_GFX1100: | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~ | EF_AMDGPU_MACH_AMDGCN_GFX1030 make[4]: *** [Makefile:813: libgomp_plugin_gcn_la-plugin-gcn.lo] Error 1 Fix-up for commit 52a2c659ae6c21f84b6acce0afcb9b93b9dc71a0 "GCN: Add pre-initial support for gfx1100". libgomp/ * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add 'EF_AMDGPU_MACH_AMDGCN_GFX1100'. --- libgomp/plugin/plugin-gcn.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libgomp/plugin/plugin-gcn.c b/libgomp/plugin/plugin-gcn.c index f24a28f..0339848 100644 --- a/libgomp/plugin/plugin-gcn.c +++ b/libgomp/plugin/plugin-gcn.c @@ -389,7 +389,8 @@ typedef enum { EF_AMDGPU_MACH_AMDGCN_GFX906 = 0x02f, EF_AMDGPU_MACH_AMDGCN_GFX908 = 0x030, EF_AMDGPU_MACH_AMDGCN_GFX90a = 0x03f, - EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036 + EF_AMDGPU_MACH_AMDGCN_GFX1030 = 0x036, + EF_AMDGPU_MACH_AMDGCN_GFX1100 = 0x041 } EF_AMDGPU_MACH; const static int EF_AMDGPU_MACH_MASK = 0x000000ff; -- cgit v1.1 From 932b8d077c23986da5279bf8b5d84875de1fb826 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 8 Jan 2024 20:27:20 +0000 Subject: hppa: Fix bind_c_coms.f90 and bind_c_vars.f90 tests on hppa Commit 6271dd98 changed the default from -fcommon to -fno-common. This silently changed the alignment of uninitialized BSS data on hppa where the alignment of common data must be greater or equal to the alignment of the largest type that will fit in the block. For example, the alignment of `double d[2];' changed from 16 to 8 on hppa64. The hppa architecture requires strict alignment and the linker warns about inconsistent alignment of variables. This change broke the gfortran.dg/bind_c_coms.f90 and gfortran.dg/bind_c_vars.f90 tests. These tests check whether bind_c works between fortran and C. Adding the -fcommon option fixes the tests. Probably, gcc and HP C are now by default inconsistent but that's water under the bridge. 2024-01-08 John David Anglin gcc/testsuite/ChangeLog: PR testsuite/94253 * gfortran.dg/bind_c_coms.f90: Add -fcommon option on hppa*-*-*. * gfortran.dg/bind_c_vars.f90: Likewise. --- gcc/testsuite/gfortran.dg/bind_c_coms.f90 | 1 + gcc/testsuite/gfortran.dg/bind_c_vars.f90 | 1 + 2 files changed, 2 insertions(+) diff --git a/gcc/testsuite/gfortran.dg/bind_c_coms.f90 b/gcc/testsuite/gfortran.dg/bind_c_coms.f90 index 85ead9f..2f97149 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_coms.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_coms.f90 @@ -3,6 +3,7 @@ ! { dg-options "-w" } ! the -w option is to prevent the warning about long long ints module bind_c_coms +! { dg-additional-options "-fcommon" { target hppa*-*-hpux* } } use, intrinsic :: iso_c_binding implicit none diff --git a/gcc/testsuite/gfortran.dg/bind_c_vars.f90 b/gcc/testsuite/gfortran.dg/bind_c_vars.f90 index 4f4a0cf..ede3ffd 100644 --- a/gcc/testsuite/gfortran.dg/bind_c_vars.f90 +++ b/gcc/testsuite/gfortran.dg/bind_c_vars.f90 @@ -1,6 +1,7 @@ ! { dg-do run } ! { dg-additional-sources bind_c_vars_driver.c } module bind_c_vars +! { dg-additional-options "-fcommon" { target hppa*-*-hpux* } } use, intrinsic :: iso_c_binding implicit none -- cgit v1.1 From d1260e9e2464c654685ccccfcd469400abdcf15c Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 8 Jan 2024 22:18:28 +0000 Subject: Skip gfortran.dg/dec_math.f90 on hppa hppa*-*-hpux* doesn't have any long double trig functions. 2024-01-08 John David Anglin gcc/testsuite/ChangeLog: * gfortran.dg/dec_math.f90: Skip on hppa*-*-hpux*. --- gcc/testsuite/gfortran.dg/dec_math.f90 | 1 + 1 file changed, 1 insertion(+) diff --git a/gcc/testsuite/gfortran.dg/dec_math.f90 b/gcc/testsuite/gfortran.dg/dec_math.f90 index d95233a..393e7de 100644 --- a/gcc/testsuite/gfortran.dg/dec_math.f90 +++ b/gcc/testsuite/gfortran.dg/dec_math.f90 @@ -1,5 +1,6 @@ ! { dg-options "-cpp -std=gnu" } ! { dg-do run { xfail i?86-*-freebsd* } } +! { dg-skip-if "No long double libc functions" { hppa*-*-hpux* } } ! ! Test extra math intrinsics formerly offered by -fdec-math, ! now included with -std=gnu or -std=legacy. -- cgit v1.1 From ff9e79eba27c91e9a2590b7d2e3ed57f1b8ed951 Mon Sep 17 00:00:00 2001 From: John David Anglin Date: Mon, 8 Jan 2024 22:32:03 +0000 Subject: xfail dg-final "Sunk statements: 5" on hppa*64*-*-* 2024-01-08 John David Anglin gcc/testsuite/ChangeLog: * gcc.dg/tree-ssa/ssa-sink-18.c: xfail dg-final "Sunk statements: 5" on hppa*64*-*-*. --- gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c index 1372100..b199df2 100644 --- a/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c +++ b/gcc/testsuite/gcc.dg/tree-ssa/ssa-sink-18.c @@ -215,4 +215,4 @@ compute_on_bytes (uint8_t *in_data, int in_len, uint8_t *out_data, int out_len) base+index addressing modes, so the ip[len] address computation can't be made from the IV computation above. powerpc64le similarly is affected. */ - /* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink2" { target lp64 xfail { riscv64-*-* powerpc64le-*-* } } } } */ + /* { dg-final { scan-tree-dump-times "Sunk statements: 5" 1 "sink2" { target lp64 xfail { riscv64-*-* powerpc64le-*-* hppa*64*-*-* } } } } */ -- cgit v1.1 From 6b1d6a2d3aac1fdd39f351a756cea6a7181192df Mon Sep 17 00:00:00 2001 From: GCC Administrator Date: Tue, 9 Jan 2024 00:17:50 +0000 Subject: Daily bump. --- ChangeLog | 4 ++ contrib/ChangeLog | 7 +++ gcc/ChangeLog | 111 ++++++++++++++++++++++++++++++++++++++++++++++++ gcc/DATESTAMP | 2 +- gcc/fortran/ChangeLog | 6 +++ gcc/testsuite/ChangeLog | 59 +++++++++++++++++++++++++ libgomp/ChangeLog | 20 +++++++++ libgrust/ChangeLog | 6 +++ libstdc++-v3/ChangeLog | 77 +++++++++++++++++++++++++++++++++ 9 files changed, 291 insertions(+), 1 deletion(-) diff --git a/ChangeLog b/ChangeLog index 0ea9206..3b32e6d 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,7 @@ +2024-01-08 Joseph Myers + + * MAINTAINERS: Update my email address. + 2023-12-30 Joseph Myers * MAINTAINERS: Update my email address. diff --git a/contrib/ChangeLog b/contrib/ChangeLog index bf16df8..569b889 100644 --- a/contrib/ChangeLog +++ b/contrib/ChangeLog @@ -1,3 +1,10 @@ +2024-01-08 Jonathan Wakely + + * unicode/README: Add notes about generating libstdc++ tables. + * unicode/GraphemeBreakProperty.txt: New file. + * unicode/emoji-data.txt: New file. + * unicode/gen_libstdcxx_unicode_data.py: New file. + 2024-01-05 Jonathan Wakely * analyze_brprob.py: Remove stray text at end of comment. diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 3d43d6e..5d5cf71 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,114 @@ +2024-01-08 Ilya Leoshkevich + + PR sanitizer/113251 + * varasm.cc (assemble_function_label_raw): Do not call + asan_function_start () without the current function. + +2024-01-08 Cupertino Miranda + + PR target/113225 + * btfout.cc (btf_collect_datasec): Skip creating BTF info for + extern and kernel_helper attributed function decls. + +2024-01-08 Cupertino Miranda + + * btfout.cc (output_btf_strs): Changed. + +2024-01-08 Tobias Burnus + + * config/gcn/mkoffload.cc (main): Handle gfx1100 + when setting the default XNACK. + +2024-01-08 Tobias Burnus + + * config.gcc (amdgcn-*-amdhsa): Accept --with-arch=gfx1100. + * config/gcn/gcn-hsa.h (NO_XNACK): Add gfx1100: + (ASM_SPEC): Handle gfx1100. + * config/gcn/gcn-opts.h (enum processor_type): Add PROCESSOR_GFX1100. + (enum gcn_isa): Add ISA_RDNA3. + (TARGET_GFX1100, TARGET_RDNA2_PLUS, TARGET_RDNA3): Define. + * config/gcn/gcn-valu.md: Change TARGET_RDNA2 to TARGET_RDNA2_PLUS. + * config/gcn/gcn.cc (gcn_option_override, + gcn_omp_device_kind_arch_isa, output_file_start): Handle gfx1100. + (gcn_global_address_p, gcn_addr_space_legitimate_address_p): Change + TARGET_RDNA2 to TARGET_RDNA2_PLUS. + (gcn_hsa_declare_function_name): Don't use '.amdhsa_reserve_flat_scratch' + with gfx1100. + * config/gcn/gcn.h (ASSEMBLER_DIALECT): Likewise. + (TARGET_CPU_CPP_BUILTINS): Define __RDNA3__, __gfx1030__ and + __gfx1100__. + * config/gcn/gcn.md: Change TARGET_RDNA2 to TARGET_RDNA2_PLUS. + * config/gcn/gcn.opt (Enum gpu_type): Add gfx1100. + * config/gcn/mkoffload.cc (EF_AMDGPU_MACH_AMDGCN_GFX1100): Define. + (isa_has_combined_avgprs, main): Handle gfx1100. + * config/gcn/t-omp-device (isa): Add gfx1100. + +2024-01-08 Richard Biener + + * doc/invoke.texi (-mmovbe): Clarify. + +2024-01-08 Richard Biener + + PR tree-optimization/113026 + * tree-vect-loop.cc (vect_need_peeling_or_partial_vectors_p): + Avoid an epilog in more cases. + * tree-vect-loop-manip.cc (vect_do_peeling): Adjust the + epilogues niter upper bounds and estimates. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113228 + * gimplify.cc (recalculate_side_effects): Do nothing for SSA_NAMEs. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113120 + * gimple-lower-bitint.cc (gimple_lower_bitint): Fix handling of very + large _BitInt zero INTEGER_CST PHI argument. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113119 + * gimple-lower-bitint.cc (optimizable_arith_overflow): Punt if + both REALPART_EXPR and cast from IMAGPART_EXPR appear, but cast + is before REALPART_EXPR. + +2024-01-08 Georg-Johann Lay + + PR target/112952 + * config/avr/avr.cc (avr_handle_addr_attribute): Also print valid + range when diagnosing attribute "io" and "io_low" are out of range. + (avr_eval_addr_attrib): Don't ICE on empty address at that place. + (avr_insert_attributes): Reject if attribute "address", "io" or "io_low" + in contexts other than static storage. + (avr_asm_output_aligned_decl_common): Move output of decls with + attribute "address", "io", and "io_low" to... + (avr_output_addr_attrib): ...this new function. + (avr_asm_asm_output_aligned_bss): Remove output for decls with + attribute "address", "io", and "io_low". + (avr_encode_section_info): Rectify handling of decls with attribute + "address", "io", and "io_low". + +2024-01-08 Andrew Stubbs + + * config/gcn/mkoffload.cc (TEST_XNACK_UNSET): New. + (elf_flags): Remove XNACK from the default value. + (main): Set a default XNACK according to the arch. + +2024-01-08 Andrew Stubbs + + * config/gcn/mkoffload.cc (isa_has_combined_avgprs): Delete. + (process_asm): Don't count avgprs. + +2024-01-08 Hongyu Wang + + * config/i386/i386.opt: Add supported sub-features. + * doc/extend.texi: Add description for target attribute. + +2024-01-08 Feng Wang + + * config/riscv/vector.md: Modify avl_type operand index of zvbc ins. + 2024-01-07 Roger Sayle Uros Bizjak diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP index bfc9285..e55f042 100644 --- a/gcc/DATESTAMP +++ b/gcc/DATESTAMP @@ -1 +1 @@ -20240108 +20240109 diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog index 22a5110..757e515 100644 --- a/gcc/fortran/ChangeLog +++ b/gcc/fortran/ChangeLog @@ -1,3 +1,9 @@ +2024-01-08 Harald Anlauf + + PR fortran/113245 + * trans-intrinsic.cc (gfc_conv_intrinsic_size): Use + gfc_conv_expr_present() for proper check of optional DIM argument. + 2024-01-06 Harald Anlauf José Rui Faustino de Sousa diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index a0ed82b..a8437be 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,62 @@ +2024-01-08 John David Anglin + + * gcc.dg/tree-ssa/ssa-sink-18.c: xfail dg-final "Sunk statements: 5" + on hppa*64*-*-*. + +2024-01-08 John David Anglin + + * gfortran.dg/dec_math.f90: Skip on hppa*-*-hpux*. + +2024-01-08 John David Anglin + + PR testsuite/94253 + * gfortran.dg/bind_c_coms.f90: Add -fcommon option on hppa*-*-*. + * gfortran.dg/bind_c_vars.f90: Likewise. + +2024-01-08 Cupertino Miranda + + * gcc.target/bpf/attr-kernel-helper.c: New test. + +2024-01-08 Harald Anlauf + + PR fortran/113245 + * gfortran.dg/size_optional_dim_2.f90: New test. + +2024-01-08 Richard Biener + + PR tree-optimization/113026 + * gcc.dg/torture/pr113026-1.c: New testcase. + * gcc.dg/torture/pr113026-2.c: Likewise. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113228 + * gcc.c-torture/compile/pr113228.c: New test. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113120 + * gcc.dg/bitint-62.c: New test. + +2024-01-08 Jakub Jelinek + + PR tree-optimization/113119 + * gcc.dg/bitint-61.c: New test. + +2024-01-08 Georg-Johann Lay + + PR target/112952 + * gcc.target/avr/attribute-io.h: New file. + * gcc.target/avr/pr112952-0.c: New test. + * gcc.target/avr/pr112952-1.c: New test. + * gcc.target/avr/pr112952-2.c: New test. + * gcc.target/avr/pr112952-3.c: New test. + +2024-01-08 Kito Cheng + + * gcc.target/riscv/rvv/autovec/partial/single_rgroup-3.h: Use + check + abort rather than assert. + 2024-01-07 Georg-Johann Lay * gcc.target/avr/lra-cpymem_qi.c: Remove duplicate -mmcu=. diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index eccc898..63bc54f 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,23 @@ +2024-01-08 Thomas Schwinge + + * plugin/plugin-gcn.c (EF_AMDGPU_MACH): Add + 'EF_AMDGPU_MACH_AMDGCN_GFX1100'. + +2024-01-08 Julian Brown + + * plugin/plugin-gcn.c (hsa_runtime_fn_info): Add + hsa_amd_memory_lock_fn, hsa_amd_memory_unlock_fn, + hsa_amd_memory_async_copy_rect_fn function pointers. + (init_hsa_runtime_functions): Add above functions, with + DLSYM_OPT_FN. + (GOMP_OFFLOAD_memcpy2d, GOMP_OFFLOAD_memcpy3d): New functions. + +2024-01-08 Tobias Burnus + + * plugin/plugin-gcn.c (gcn_gfx1100_s): New const string. + (gcn_isa_name_len): Fix length. + (isa_hsa_name, isa_code, max_isa_vgprs): Handle gfx1100. + 2024-01-06 Tobias Burnus * libgomp.texi (OpenMP Technical Report 12): Fix a typo. diff --git a/libgrust/ChangeLog b/libgrust/ChangeLog index a01c408..fa9e210 100644 --- a/libgrust/ChangeLog +++ b/libgrust/ChangeLog @@ -1,3 +1,9 @@ +2024-01-08 Thomas Schwinge + + PR rust/113056 + * configure.ac: 'AM_ENABLE_MULTILIB' only for target builds. + * configure: Regenerate. + 2023-12-14 Pierre-Emmanuel Patry Arthur Cohen diff --git a/libstdc++-v3/ChangeLog b/libstdc++-v3/ChangeLog index 9cbd55a..056174c 100644 --- a/libstdc++-v3/ChangeLog +++ b/libstdc++-v3/ChangeLog @@ -1,3 +1,80 @@ +2024-01-08 Jonathan Wakely + + * include/bits/unicode.h (__null_sentinel): Remove. + * testsuite/17_intro/names.cc: Add __null_sentinel. + +2024-01-08 Jonathan Wakely + + * include/std/format (__format::_Runtime_format_string): Define + new class template. + (basic_format_string): Add non-consteval constructor for runtime + format strings. + (runtime_format): Define new function for C++26. + * testsuite/std/format/runtime_format.cc: New test. + +2024-01-08 Jonathan Wakely + + * include/bits/chrono_io.h (__formatter_chrono): Always use + lvalue arguments to make_format_args. + * include/std/format (make_format_args): Change parameter pack + from forwarding references to lvalue references. Remove use of + remove_reference_t which is now unnecessary. + (format_to, formatted_size): Remove incorrect forwarding of + arguments. + * include/std/ostream (print): Remove forwarding of arguments. + * include/std/print (print): Likewise. + * testsuite/20_util/duration/io.cc: Use lvalues as arguments to + make_format_args. + * testsuite/std/format/arguments/args.cc: Likewise. + * testsuite/std/format/arguments/lwg3810.cc: Likewise. + * testsuite/std/format/functions/format.cc: Likewise. + * testsuite/std/format/functions/vformat_to.cc: Likewise. + * testsuite/std/format/string.cc: Likewise. + * testsuite/std/time/day/io.cc: Likewise. + * testsuite/std/time/month/io.cc: Likewise. + * testsuite/std/time/weekday/io.cc: Likewise. + * testsuite/std/time/year/io.cc: Likewise. + * testsuite/std/time/year_month_day/io.cc: Likewise. + * testsuite/std/format/arguments/args_neg.cc: New test. + +2024-01-08 Jonathan Wakely + + * include/Makefile.am: Add new headers. + * include/Makefile.in: Regenerate. + * include/bits/unicode.h: New file. + * include/bits/unicode-data.h: New file. + * include/std/format: Include . + (__literal_encoding_is_utf8): Move to . + (_Spec::_M_fill): Change type to char32_t. + (_Spec::_M_parse_fill_and_align): Read a Unicode scalar value + instead of a single character. + (__write_padded): Change __fill_char parameter to char32_t and + encode it into the output. + (__formatter_str::format): Use new __unicode::__field_width and + __unicode::__truncate functions. + * include/std/ostream: Adjust namespace qualification for + __literal_encoding_is_utf8. + * include/std/print: Likewise. + * src/c++23/print.cc: Add [[unlikely]] attribute to error path. + * testsuite/ext/unicode/view.cc: New test. + * testsuite/std/format/functions/format.cc: Add missing examples + from the standard demonstrating alignment with non-ASCII + characters. Add examples checking correct handling of extended + grapheme clusters. + +2024-01-08 Jonathan Wakely + + * include/bits/version.def (format_uchar): Define. + * include/bits/version.h: Regenerate. + * include/std/format (formatter::format): Check for + _Pres_c and call _M_format_character directly. Cast C to its + unsigned equivalent for formatting as an integer. + (formatter::format): Likewise. + (basic_format_arg(T&)): Store char arguments as unsigned char + for formatting to a wide string. + * testsuite/std/format/functions/format.cc: Adjust test. Check + formatting of + 2024-01-07 Jonathan Wakely PR libstdc++/112997 -- cgit v1.1 From e50a1ed3d36dee5baee5d89126a80419b119a7c5 Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Mon, 8 Jan 2024 09:12:01 +0000 Subject: RISC-V: Add crypto vector builtin function. This patch add the intrinsic funtions of crypto vector based on the intrinsic doc(https://github.com/riscv-non-isa/rvv-intrinsic-doc/blob /eopc/vector-crypto/auto-generated/vector-crypto/intrinsic_funcs.md). Co-Authored by: Songhe Zhu Co-Authored by: Ciyan Pan gcc/ChangeLog: * config/riscv/riscv-vector-builtins-bases.cc (class vandn): Add new function_base for crypto vector. (class bitmanip): Ditto. (class b_reverse):Ditto. (class vwsll): Ditto. (class clmul): Ditto. (class vg_nhab): Ditto. (class crypto_vv):Ditto. (class crypto_vi):Ditto. (class vaeskf2_vsm3c):Ditto. (class vsm3me): Ditto. (BASE): Add BASE declaration for crypto vector. * config/riscv/riscv-vector-builtins-bases.h: Ditto. * config/riscv/riscv-vector-builtins-functions.def (REQUIRED_EXTENSIONS): Add crypto vector intrinsic definition. (vbrev): Ditto. (vclz): Ditto. (vctz): Ditto. (vwsll): Ditto. (vandn): Ditto. (vbrev8): Ditto. (vrev8): Ditto. (vrol): Ditto. (vror): Ditto. (vclmul): Ditto. (vclmulh): Ditto. (vghsh): Ditto. (vgmul): Ditto. (vaesef): Ditto. (vaesem): Ditto. (vaesdf): Ditto. (vaesdm): Ditto. (vaesz): Ditto. (vaeskf1): Ditto. (vaeskf2): Ditto. (vsha2ms): Ditto. (vsha2ch): Ditto. (vsha2cl): Ditto. (vsm4k): Ditto. (vsm4r): Ditto. (vsm3me): Ditto. (vsm3c): Ditto. * config/riscv/riscv-vector-builtins-shapes.cc (struct crypto_vv_def): Add new function_shape for crypto vector. (struct crypto_vi_def): Ditto. (struct crypto_vv_no_op_type_def): Ditto. (SHAPE): Add SHAPE declaration of crypto vector. * config/riscv/riscv-vector-builtins-shapes.h: Ditto. * config/riscv/riscv-vector-builtins-types.def (DEF_RVV_CRYPTO_SEW32_OPS): Add new data type for crypto vector. (DEF_RVV_CRYPTO_SEW64_OPS): Ditto. (vuint32mf2_t): Ditto. (vuint32m1_t): Ditto. (vuint32m2_t): Ditto. (vuint32m4_t): Ditto. (vuint32m8_t): Ditto. (vuint64m1_t): Ditto. (vuint64m2_t): Ditto. (vuint64m4_t): Ditto. (vuint64m8_t): Ditto. * config/riscv/riscv-vector-builtins.cc (DEF_RVV_CRYPTO_SEW32_OPS): Add new data struct for crypto vector. (DEF_RVV_CRYPTO_SEW64_OPS): Ditto. (registered_function::overloaded_hash): Processing size_t uimm for C overloaded func. * config/riscv/riscv-vector-builtins.def (vi): Add vi OP_TYPE. --- gcc/config/riscv/riscv-vector-builtins-bases.cc | 264 ++++++++++++++++++++- gcc/config/riscv/riscv-vector-builtins-bases.h | 28 +++ .../riscv/riscv-vector-builtins-functions.def | 94 ++++++++ gcc/config/riscv/riscv-vector-builtins-shapes.cc | 87 ++++++- gcc/config/riscv/riscv-vector-builtins-shapes.h | 4 + gcc/config/riscv/riscv-vector-builtins-types.def | 25 ++ gcc/config/riscv/riscv-vector-builtins.cc | 133 ++++++++++- gcc/config/riscv/riscv-vector-builtins.def | 1 + 8 files changed, 633 insertions(+), 3 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc index 810783b..fba9812 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.cc +++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc @@ -2127,6 +2127,212 @@ public: } }; +/* Below implements are vector crypto */ +/* Implements vandn.[vv,vx] */ +class vandn : public function_base +{ +public: + rtx expand (function_expander &e) const override + { + switch (e.op_info->op) + { + case OP_TYPE_vv: + return e.use_exact_insn (code_for_pred_vandn (e.vector_mode ())); + case OP_TYPE_vx: + return e.use_exact_insn (code_for_pred_vandn_scalar (e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + +/* Implements vrol/vror/clz/ctz. */ +template +class bitmanip : public function_base +{ +public: + bool apply_tail_policy_p () const override + { + return (CODE == CLZ || CODE == CTZ) ? false : true; + } + bool apply_mask_policy_p () const override + { + return (CODE == CLZ || CODE == CTZ) ? false : true; + } + bool has_merge_operand_p () const override + { + return (CODE == CLZ || CODE == CTZ) ? false : true; + } + + rtx expand (function_expander &e) const override + { + switch (e.op_info->op) + { + case OP_TYPE_v: + case OP_TYPE_vv: + return e.use_exact_insn (code_for_pred_v (CODE, e.vector_mode ())); + case OP_TYPE_vx: + return e.use_exact_insn (code_for_pred_v_scalar (CODE, e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + +/* Implements vbrev/vbrev8/vrev8. */ +template +class b_reverse : public function_base +{ +public: + rtx expand (function_expander &e) const override + { + return e.use_exact_insn (code_for_pred_v (UNSPEC, e.vector_mode ())); + } +}; + +class vwsll : public function_base +{ +public: + rtx expand (function_expander &e) const override + { + switch (e.op_info->op) + { + case OP_TYPE_vv: + return e.use_exact_insn (code_for_pred_vwsll (e.vector_mode ())); + case OP_TYPE_vx: + return e.use_exact_insn (code_for_pred_vwsll_scalar (e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + +/* Implements clmul */ +template +class clmul : public function_base +{ +public: + rtx expand (function_expander &e) const override + { + switch (e.op_info->op) + { + case OP_TYPE_vv: + return e.use_exact_insn ( + code_for_pred_vclmul (UNSPEC, e.vector_mode ())); + case OP_TYPE_vx: + return e.use_exact_insn + (code_for_pred_vclmul_scalar (UNSPEC, e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + +/* Implements vghsh/vsh2ms/vsha2c[hl]. */ +template +class vg_nhab : public function_base +{ +public: + bool apply_mask_policy_p () const override { return false; } + bool use_mask_predication_p () const override { return false; } + bool has_merge_operand_p () const override { return false; } + + rtx expand (function_expander &e) const override + { + return e.use_exact_insn (code_for_pred_v (UNSPEC, e.vector_mode ())); + } +}; + +/* Implements vgmul/vaes*. */ +template +class crypto_vv : public function_base +{ +public: + bool apply_mask_policy_p () const override { return false; } + bool use_mask_predication_p () const override { return false; } + bool has_merge_operand_p () const override { return false; } + + rtx expand (function_expander &e) const override + { + poly_uint64 nunits = 0U; + switch (e.op_info->op) + { + case OP_TYPE_vv: + if (UNSPEC == UNSPEC_VGMUL) + return e.use_exact_insn + (code_for_pred_crypto_vv (UNSPEC, UNSPEC, e.vector_mode ())); + else + return e.use_exact_insn + (code_for_pred_crypto_vv (UNSPEC + 1, UNSPEC + 1, e.vector_mode ())); + case OP_TYPE_vs: + /* Calculate the ratio between arg0 and arg1*/ + gcc_assert (multiple_p (GET_MODE_BITSIZE (e.arg_mode (0)), + GET_MODE_BITSIZE (e.arg_mode (1)), &nunits)); + if (maybe_eq (nunits, 1U)) + return e.use_exact_insn (code_for_pred_crypto_vvx1_scalar + (UNSPEC + 2, UNSPEC + 2, e.vector_mode ())); + else if (maybe_eq (nunits, 2U)) + return e.use_exact_insn (code_for_pred_crypto_vvx2_scalar + (UNSPEC + 2, UNSPEC + 2, e.vector_mode ())); + else if (maybe_eq (nunits, 4U)) + return e.use_exact_insn (code_for_pred_crypto_vvx4_scalar + (UNSPEC + 2, UNSPEC + 2, e.vector_mode ())); + else if (maybe_eq (nunits, 8U)) + return e.use_exact_insn (code_for_pred_crypto_vvx8_scalar + (UNSPEC + 2, UNSPEC + 2, e.vector_mode ())); + else + return e.use_exact_insn (code_for_pred_crypto_vvx16_scalar + (UNSPEC + 2, UNSPEC + 2, e.vector_mode ())); + default: + gcc_unreachable (); + } + } +}; + +/* Implements vaeskf1/vsm4k. */ +template +class crypto_vi : public function_base +{ +public: + bool apply_mask_policy_p () const override { return false; } + bool use_mask_predication_p () const override { return false; } + + rtx expand (function_expander &e) const override + { + return e.use_exact_insn + (code_for_pred_crypto_vi_scalar (UNSPEC, e.vector_mode ())); + } +}; + +/* Implements vaeskf2/vsm3c. */ +template +class vaeskf2_vsm3c : public function_base +{ +public: + bool apply_mask_policy_p () const override { return false; } + bool use_mask_predication_p () const override { return false; } + bool has_merge_operand_p () const override { return false; } + + rtx expand (function_expander &e) const override + { + return e.use_exact_insn + (code_for_pred_vi_nomaskedoff_scalar (UNSPEC, e.vector_mode ())); + } +}; + +/* Implements vsm3me. */ +class vsm3me : public function_base +{ +public: + bool apply_mask_policy_p () const override { return false; } + bool use_mask_predication_p () const override { return false; } + + rtx expand (function_expander &e) const override + { + return e.use_exact_insn (code_for_pred_vsm3me (e.vector_mode ())); + } +}; + static CONSTEXPR const vsetvl vsetvl_obj; static CONSTEXPR const vsetvl vsetvlmax_obj; static CONSTEXPR const loadstore vle_obj; @@ -2384,6 +2590,35 @@ static CONSTEXPR const seg_indexed_store vsuxseg_obj; static CONSTEXPR const seg_indexed_store vsoxseg_obj; static CONSTEXPR const vlsegff vlsegff_obj; +/* Crypto Vector */ +static CONSTEXPR const vandn vandn_obj; +static CONSTEXPR const bitmanip vrol_obj; +static CONSTEXPR const bitmanip vror_obj; +static CONSTEXPR const b_reverse vbrev_obj; +static CONSTEXPR const b_reverse vbrev8_obj; +static CONSTEXPR const b_reverse vrev8_obj; +static CONSTEXPR const bitmanip vclz_obj; +static CONSTEXPR const bitmanip vctz_obj; +static CONSTEXPR const vwsll vwsll_obj; +static CONSTEXPR const clmul vclmul_obj; +static CONSTEXPR const clmul vclmulh_obj; +static CONSTEXPR const vg_nhab vghsh_obj; +static CONSTEXPR const crypto_vv vgmul_obj; +static CONSTEXPR const crypto_vv vaesef_obj; +static CONSTEXPR const crypto_vv vaesem_obj; +static CONSTEXPR const crypto_vv vaesdf_obj; +static CONSTEXPR const crypto_vv vaesdm_obj; +static CONSTEXPR const crypto_vv vaesz_obj; +static CONSTEXPR const crypto_vi vaeskf1_obj; +static CONSTEXPR const vaeskf2_vsm3c vaeskf2_obj; +static CONSTEXPR const vg_nhab vsha2ms_obj; +static CONSTEXPR const vg_nhab vsha2ch_obj; +static CONSTEXPR const vg_nhab vsha2cl_obj; +static CONSTEXPR const crypto_vi vsm4k_obj; +static CONSTEXPR const crypto_vv vsm4r_obj; +static CONSTEXPR const vsm3me vsm3me_obj; +static CONSTEXPR const vaeskf2_vsm3c vsm3c_obj; + /* Declare the function base NAME, pointing it to an instance of class _obj. */ #define BASE(NAME) \ @@ -2645,5 +2880,32 @@ BASE (vloxseg) BASE (vsuxseg) BASE (vsoxseg) BASE (vlsegff) - +/* Crypto vector */ +BASE (vandn) +BASE (vbrev) +BASE (vbrev8) +BASE (vrev8) +BASE (vclz) +BASE (vctz) +BASE (vrol) +BASE (vror) +BASE (vwsll) +BASE (vclmul) +BASE (vclmulh) +BASE (vghsh) +BASE (vgmul) +BASE (vaesef) +BASE (vaesem) +BASE (vaesdf) +BASE (vaesdm) +BASE (vaesz) +BASE (vaeskf1) +BASE (vaeskf2) +BASE (vsha2ms) +BASE (vsha2ch) +BASE (vsha2cl) +BASE (vsm4k) +BASE (vsm4r) +BASE (vsm3me) +BASE (vsm3c) } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h index 2e18a62..87c7f43 100644 --- a/gcc/config/riscv/riscv-vector-builtins-bases.h +++ b/gcc/config/riscv/riscv-vector-builtins-bases.h @@ -280,6 +280,34 @@ extern const function_base *const vloxseg; extern const function_base *const vsuxseg; extern const function_base *const vsoxseg; extern const function_base *const vlsegff; +/* Below function_base are Vectro Crypto*/ +extern const function_base *const vandn; +extern const function_base *const vbrev; +extern const function_base *const vbrev8; +extern const function_base *const vrev8; +extern const function_base *const vclz; +extern const function_base *const vctz; +extern const function_base *const vrol; +extern const function_base *const vror; +extern const function_base *const vwsll; +extern const function_base *const vclmul; +extern const function_base *const vclmulh; +extern const function_base *const vghsh; +extern const function_base *const vgmul; +extern const function_base *const vaesef; +extern const function_base *const vaesem; +extern const function_base *const vaesdf; +extern const function_base *const vaesdm; +extern const function_base *const vaesz; +extern const function_base *const vaeskf1; +extern const function_base *const vaeskf2; +extern const function_base *const vsha2ms; +extern const function_base *const vsha2ch; +extern const function_base *const vsha2cl; +extern const function_base *const vsm4k; +extern const function_base *const vsm4r; +extern const function_base *const vsm3me; +extern const function_base *const vsm3c; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index c44bc39..96dd0d9 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -653,4 +653,98 @@ DEF_RVV_FUNCTION (vsoxseg, seg_indexed_loadstore, none_m_preds, tuple_v_scalar_p DEF_RVV_FUNCTION (vlsegff, seg_fault_load, full_preds, tuple_v_scalar_const_ptr_size_ptr_ops) #undef REQUIRED_EXTENSIONS +/* Definiation of crypto vector intrinsic functions */ +// ZVBB and ZVKB +#define REQUIRED_EXTENSIONS ZVBB_EXT +DEF_RVV_FUNCTION (vbrev, alu, full_preds, u_vv_ops) +DEF_RVV_FUNCTION (vclz, alu, none_m_preds, u_vv_ops) +DEF_RVV_FUNCTION (vctz, alu, none_m_preds, u_vv_ops) +DEF_RVV_FUNCTION (vwsll, alu, full_preds, u_wvv_ops) +DEF_RVV_FUNCTION (vwsll, alu, full_preds, u_shift_wvx_ops) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS ZVBB_OR_ZVKB_EXT +DEF_RVV_FUNCTION (vandn, alu, full_preds, u_vvv_ops) +DEF_RVV_FUNCTION (vandn, alu, full_preds, u_vvx_ops) +DEF_RVV_FUNCTION (vbrev8, alu, full_preds, u_vv_ops) +DEF_RVV_FUNCTION (vrev8, alu, full_preds, u_vv_ops) +DEF_RVV_FUNCTION (vrol, alu, full_preds, u_vvv_ops) +DEF_RVV_FUNCTION (vror, alu, full_preds, u_vvv_ops) +DEF_RVV_FUNCTION (vror, alu, full_preds, u_shift_vvx_ops) +DEF_RVV_FUNCTION (vrol, alu, full_preds, u_shift_vvx_ops) +#undef REQUIRED_EXTENSIONS +//ZVBC +#define REQUIRED_EXTENSIONS ZVBC_EXT +DEF_RVV_FUNCTION (vclmul, alu, full_preds, u_vvv_crypto_sew64_ops) +DEF_RVV_FUNCTION (vclmul, alu, full_preds, u_vvx_crypto_sew64_ops) +DEF_RVV_FUNCTION (vclmulh, alu, full_preds, u_vvv_crypto_sew64_ops) +DEF_RVV_FUNCTION (vclmulh, alu, full_preds, u_vvx_crypto_sew64_ops) +#undef REQUIRED_EXTENSIONS +//ZVKG +#define REQUIRED_EXTENSIONS ZVKG_EXT +DEF_RVV_FUNCTION(vghsh, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew32_ops) +DEF_RVV_FUNCTION(vgmul, no_mask_policy, none_tu_preds, u_vvv_crypto_sew32_ops) +#undef REQUIRED_EXTENSIONS +//ZVKNED +#define REQUIRED_EXTENSIONS ZVKNED_EXT +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vaesef, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vaesem, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vaesdf, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vaesdm, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +DEF_RVV_FUNCTION (vaesz, crypto_vv_no_op_type, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaesz, crypto_vv_no_op_type, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vaesz, crypto_vv_no_op_type, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vaesz, crypto_vv_no_op_type, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vaesz, crypto_vv_no_op_type, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +DEF_RVV_FUNCTION (vaeskf1, crypto_vi, none_tu_preds, u_vv_size_crypto_sew32_ops) +DEF_RVV_FUNCTION (vaeskf2, crypto_vi, none_tu_preds, u_vvv_size_crypto_sew32_ops) +#undef REQUIRED_EXTENSIONS +//ZVKNHA +//ZVKNHA and ZVKNHB +#define REQUIRED_EXTENSIONS ZVKNHA_OR_ZVKNHB_EXT +DEF_RVV_FUNCTION (vsha2ms, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsha2ch, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsha2cl, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew32_ops) +#undef REQUIRED_EXTENSIONS + +#define REQUIRED_EXTENSIONS ZVKNHB_EXT +DEF_RVV_FUNCTION (vsha2ms, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew64_ops) +DEF_RVV_FUNCTION (vsha2ch, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew64_ops) +DEF_RVV_FUNCTION (vsha2cl, no_mask_policy, none_tu_preds, u_vvvv_crypto_sew64_ops) +#undef REQUIRED_EXTENSIONS +//Zvksed +#define REQUIRED_EXTENSIONS ZVKSED_EXT +DEF_RVV_FUNCTION (vsm4k, crypto_vi, none_tu_preds, u_vv_size_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x2_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x4_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x8_ops) +DEF_RVV_FUNCTION (vsm4r, crypto_vv, none_tu_preds, u_vvs_crypto_sew32_lmul_x16_ops) +#undef REQUIRED_EXTENSIONS +//Zvksh +#define REQUIRED_EXTENSIONS ZVKSH_EXT +DEF_RVV_FUNCTION (vsm3me, no_mask_policy, none_tu_preds, u_vvv_crypto_sew32_ops) +DEF_RVV_FUNCTION (vsm3c, crypto_vi, none_tu_preds, u_vvv_size_crypto_sew32_ops) +#undef REQUIRED_EXTENSIONS #undef DEF_RVV_FUNCTION diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc index 0d50111..ee8058d 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc @@ -984,6 +984,89 @@ struct seg_fault_load_def : public build_base } }; +/* vsm4r/vaes* class. */ +struct crypto_vv_def : public build_base +{ + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override + { + /* Return nullptr if it can not be overloaded. */ + if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred)) + return nullptr; + b.append_base_name (instance.base_name); + b.append_name (operand_suffixes[instance.op_info->op]); + + if (!overloaded_p) + { + if (instance.op_info->op == OP_TYPE_vv) + b.append_name (type_suffixes[instance.type.index].vector); + else + { + vector_type_index arg0_type_idx + = instance.op_info->args[1].get_function_type_index + (instance.type.index); + b.append_name (type_suffixes[arg0_type_idx].vector); + vector_type_index ret_type_idx + = instance.op_info->ret.get_function_type_index + (instance.type.index); + b.append_name (type_suffixes[ret_type_idx].vector); + } + } + + b.append_name (predication_suffixes[instance.pred]); + return b.finish_name (); + } +}; + +/* vaeskf1/vaeskf2/vsm4k/vsm3c class. */ +struct crypto_vi_def : public build_base +{ + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override + { + /* Return nullptr if it can not be overloaded. */ + if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred)) + return nullptr; + b.append_base_name (instance.base_name); + if (!overloaded_p) + { + b.append_name (operand_suffixes[instance.op_info->op]); + b.append_name (type_suffixes[instance.type.index].vector); + } + b.append_name (predication_suffixes[instance.pred]); + return b.finish_name (); + } +}; + +/* vaesz class. */ +struct crypto_vv_no_op_type_def : public build_base +{ + char *get_name (function_builder &b, const function_instance &instance, + bool overloaded_p) const override + { + /* Return nullptr if it can not be overloaded. */ + if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred)) + return nullptr; + b.append_base_name (instance.base_name); + + if (!overloaded_p) + { + b.append_name (operand_suffixes[instance.op_info->op]); + vector_type_index arg0_type_idx + = instance.op_info->args[1].get_function_type_index + (instance.type.index); + b.append_name (type_suffixes[arg0_type_idx].vector); + vector_type_index ret_type_idx + = instance.op_info->ret.get_function_type_index + (instance.type.index); + b.append_name (type_suffixes[ret_type_idx].vector); + } + + b.append_name (predication_suffixes[instance.pred]); + return b.finish_name (); + } +}; + SHAPE(vsetvl, vsetvl) SHAPE(vsetvl, vsetvlmax) SHAPE(loadstore, loadstore) @@ -1012,5 +1095,7 @@ SHAPE(vlenb, vlenb) SHAPE(seg_loadstore, seg_loadstore) SHAPE(seg_indexed_loadstore, seg_indexed_loadstore) SHAPE(seg_fault_load, seg_fault_load) - +SHAPE(crypto_vv, crypto_vv) +SHAPE(crypto_vi, crypto_vi) +SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type) } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h index d3b5cf7..ac2a28c 100644 --- a/gcc/config/riscv/riscv-vector-builtins-shapes.h +++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h @@ -52,6 +52,10 @@ extern const function_shape *const vlenb; extern const function_shape *const seg_loadstore; extern const function_shape *const seg_indexed_loadstore; extern const function_shape *const seg_fault_load; +/* Below function_shape are Vectro Crypto*/ +extern const function_shape *const crypto_vv; +extern const function_shape *const crypto_vi; +extern const function_shape *const crypto_vv_no_op_type; } } // end namespace riscv_vector diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def index 06dc710..61019a5 100644 --- a/gcc/config/riscv/riscv-vector-builtins-types.def +++ b/gcc/config/riscv/riscv-vector-builtins-types.def @@ -339,6 +339,18 @@ along with GCC; see the file COPYING3. If not see #define DEF_RVV_TUPLE_OPS(TYPE, REQUIRE) #endif +/* Use "DEF_RVV_CRYPTO_SEW32_OPS" macro include all SEW=32 types + which will be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_CRYPTO_SEW32_OPS +#define DEF_RVV_CRYPTO_SEW32_OPS(TYPE, REQUIRE) +#endif + +/* Use "DEF_RVV_CRYPTO_SEW64_OPS" macro include all SEW=64 types + which will be iterated and registered as intrinsic functions. */ +#ifndef DEF_RVV_CRYPTO_SEW64_OPS +#define DEF_RVV_CRYPTO_SEW64_OPS(TYPE, REQUIRE) +#endif + DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64) DEF_RVV_I_OPS (vint8mf4_t, 0) DEF_RVV_I_OPS (vint8mf2_t, 0) @@ -1355,6 +1367,17 @@ DEF_RVV_TUPLE_OPS (vfloat64m2x3_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_TUPLE_OPS (vfloat64m2x4_t, RVV_REQUIRE_ELEN_FP_64) DEF_RVV_TUPLE_OPS (vfloat64m4x2_t, RVV_REQUIRE_ELEN_FP_64) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32m1_t, 0) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32m2_t, 0) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32m4_t, 0) +DEF_RVV_CRYPTO_SEW32_OPS (vuint32m8_t, 0) + +DEF_RVV_CRYPTO_SEW64_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_CRYPTO_SEW64_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_CRYPTO_SEW64_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64) +DEF_RVV_CRYPTO_SEW64_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64) + #undef DEF_RVV_I_OPS #undef DEF_RVV_U_OPS #undef DEF_RVV_F_OPS @@ -1406,3 +1429,5 @@ DEF_RVV_TUPLE_OPS (vfloat64m4x2_t, RVV_REQUIRE_ELEN_FP_64) #undef DEF_RVV_LMUL2_OPS #undef DEF_RVV_LMUL4_OPS #undef DEF_RVV_TUPLE_OPS +#undef DEF_RVV_CRYPTO_SEW32_OPS +#undef DEF_RVV_CRYPTO_SEW64_OPS diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc index d509be5..25e0b6e 100644 --- a/gcc/config/riscv/riscv-vector-builtins.cc +++ b/gcc/config/riscv/riscv-vector-builtins.cc @@ -521,6 +521,19 @@ static const rvv_type_info tuple_ops[] = { #include "riscv-vector-builtins-types.def" {NUM_VECTOR_TYPES, 0}}; +/* Below types will be registered for vector-crypto intrinsic functions*/ +/* A list of sew32 will be registered for vector-crypto intrinsic functions. */ +static const rvv_type_info crypto_sew32_ops[] = { +#define DEF_RVV_CRYPTO_SEW32_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + +/* A list of sew64 will be registered for vector-crypto intrinsic functions. */ +static const rvv_type_info crypto_sew64_ops[] = { +#define DEF_RVV_CRYPTO_SEW64_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE}, +#include "riscv-vector-builtins-types.def" + {NUM_VECTOR_TYPES, 0}}; + static CONSTEXPR const rvv_arg_type_info rvv_arg_type_info_end = rvv_arg_type_info (NUM_BASE_TYPES); @@ -754,6 +767,11 @@ static CONSTEXPR const rvv_arg_type_info v_size_args[] = {rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info_end}; +/* A list of args for vector_type func (double demote_type, size_t) function. */ +static CONSTEXPR const rvv_arg_type_info wv_size_args[] + = {rvv_arg_type_info (RVV_BASE_double_trunc_vector), + rvv_arg_type_info (RVV_BASE_size),rvv_arg_type_info_end}; + /* A list of args for vector_type func (vector_type, vector_type, size) * function. */ static CONSTEXPR const rvv_arg_type_info vv_size_args[] @@ -1044,6 +1062,14 @@ static CONSTEXPR const rvv_op_info u_v_ops rvv_arg_type_info (RVV_BASE_vector), /* Return type */ end_args /* Args */}; +/* A static operand information for vector_type func (vector_type) + * function registration. */ +static CONSTEXPR const rvv_op_info u_vv_ops + = {u_ops, /* Types */ + OP_TYPE_v, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + v_args /* Args */}; + /* A static operand information for unsigned long func (vector_type) * function registration. */ static CONSTEXPR const rvv_op_info b_ulong_m_ops @@ -2174,6 +2200,14 @@ static CONSTEXPR const rvv_op_info u_wvv_ops rvv_arg_type_info (RVV_BASE_vector), /* Return type */ wvv_args /* Args */}; +/* A static operand information for vector_type func (double demote type, size type) + * function registration. */ +static CONSTEXPR const rvv_op_info u_shift_wvx_ops + = {wextu_ops, /* Types */ + OP_TYPE_vx, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + wv_size_args /* Args */}; + /* A static operand information for vector_type func (double demote type, double * demote scalar_type) function registration. */ static CONSTEXPR const rvv_op_info i_wvx_ops @@ -2604,6 +2638,101 @@ static CONSTEXPR const rvv_op_info all_v_vcreate_lmul4_x2_ops rvv_arg_type_info (RVV_BASE_vlmul_ext_x2), /* Return type */ ext_vcreate_args /* Args */}; +/* A static operand information for vector_type func (vector_type). + Some ins just supports SEW=32, such as crypto vectol Zvkg extension. + * function registration. */ +static CONSTEXPR const rvv_arg_type_info vs_lmul_x2_args[] + = {rvv_arg_type_info (RVV_BASE_vlmul_ext_x2), + rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end}; + +static CONSTEXPR const rvv_arg_type_info vs_lmul_x4_args[] + = {rvv_arg_type_info (RVV_BASE_vlmul_ext_x4), + rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end}; + +static CONSTEXPR const rvv_arg_type_info vs_lmul_x8_args[] + = {rvv_arg_type_info (RVV_BASE_vlmul_ext_x8), + rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end}; + +static CONSTEXPR const rvv_arg_type_info vs_lmul_x16_args[] + = {rvv_arg_type_info (RVV_BASE_vlmul_ext_x16), + rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end}; + +static CONSTEXPR const rvv_op_info u_vvv_crypto_sew32_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vv_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew32_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vvv_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvv_size_crypto_sew32_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vi, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vv_size_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vv_size_crypto_sew32_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vi, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + v_size_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvs_crypto_sew32_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vs, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vv_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvs_crypto_sew32_lmul_x2_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vs, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vlmul_ext_x2), /* Return type */ + vs_lmul_x2_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvs_crypto_sew32_lmul_x4_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vs, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vlmul_ext_x4), /* Return type */ + vs_lmul_x4_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvs_crypto_sew32_lmul_x8_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vs, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vlmul_ext_x8), /* Return type */ + vs_lmul_x8_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvs_crypto_sew32_lmul_x16_ops + = {crypto_sew32_ops, /* Types */ + OP_TYPE_vs, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vlmul_ext_x16), /* Return type */ + vs_lmul_x16_args /* Args */}; + +/* A static operand information for vector_type func (vector_type). + Some ins just supports SEW=64, such as crypto vectol Zvbc extension + vclmul.vv, vclmul.vx. + * function registration. */ +static CONSTEXPR const rvv_op_info u_vvv_crypto_sew64_ops + = {crypto_sew64_ops, /* Types */ + OP_TYPE_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vv_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvx_crypto_sew64_ops + = {crypto_sew64_ops, /* Types */ + OP_TYPE_vx, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vx_args /* Args */}; + +static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew64_ops + = {crypto_sew64_ops, /* Types */ + OP_TYPE_vv, /* Suffix */ + rvv_arg_type_info (RVV_BASE_vector), /* Return type */ + vvv_args /* Args */}; + /* A list of all RVV base function types. */ static CONSTEXPR const function_type_info function_types[] = { #define DEF_RVV_TYPE_INDEX( \ @@ -4176,7 +4305,9 @@ registered_function::overloaded_hash (const vec &arglist) __riscv_vset(vint8m2_t dest, size_t index, vint8m1_t value); The reason is the same as above. */ if ((instance.base == bases::vget && (i == (len - 1))) - || (instance.base == bases::vset && (i == (len - 2)))) + || ((instance.base == bases::vset + || instance.shape == shapes::crypto_vi) + && (i == (len - 2)))) argument_types.safe_push (size_type_node); /* Vector fixed-point arithmetic instructions requiring argument vxrm. For example: vuint32m4_t __riscv_vaaddu(vuint32m4_t vs2, diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def index 4b2ca98..055ee8b 100644 --- a/gcc/config/riscv/riscv-vector-builtins.def +++ b/gcc/config/riscv/riscv-vector-builtins.def @@ -558,6 +558,7 @@ DEF_RVV_TYPE (vfloat64m8_t, 17, __rvv_float64m8_t, double, RVVM8DF, _f64m8, DEF_RVV_OP_TYPE (vv) DEF_RVV_OP_TYPE (vx) +DEF_RVV_OP_TYPE (vi) DEF_RVV_OP_TYPE (v) DEF_RVV_OP_TYPE (wv) DEF_RVV_OP_TYPE (wx) -- cgit v1.1 From 411b210797d139811bffa15471523162db6f5f89 Mon Sep 17 00:00:00 2001 From: Feng Wang Date: Mon, 8 Jan 2024 09:12:00 +0000 Subject: RISC-V: Add crypto vector api-testing cases. Patch v8: Resubmit after fix the rtl-checking issue. Passed all the riscv regression test. Patch v7: Add newline at the end of file. Patch v6: Move intrinsic tests into rvv/base. Patch v5: Rebase Patch v4: Add some RV32 vx constraint testcase. Patch v3: Refine crypto vector api-testing cases. Patch v2: Update march info according to the change of riscv-common.c This patch add crypto vector api-testing cases based on https://github.com/riscv-non-isa/rvv-intrinsic-doc/blob/eopc/vector-crypto/auto-generated/vector-crypto gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/zvbb-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvbb_vandn_vx_constraint.c: New test. * gcc.target/riscv/rvv/base/zvbc-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvbc_vx_constraint-1.c: New test. * gcc.target/riscv/rvv/base/zvbc_vx_constraint-2.c: New test. * gcc.target/riscv/rvv/base/zvkg-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvkned-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvknha-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvknhb-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvksed-intrinsic.c: New test. * gcc.target/riscv/rvv/base/zvksh-intrinsic.c: New test. * gcc.target/riscv/zvkb.c: New test. --- .../gcc.target/riscv/rvv/base/zvbb-intrinsic.c | 179 +++++++++++++++++++++ .../riscv/rvv/base/zvbb_vandn_vx_constraint.c | 15 ++ .../gcc.target/riscv/rvv/base/zvbc-intrinsic.c | 62 +++++++ .../riscv/rvv/base/zvbc_vx_constraint-1.c | 14 ++ .../riscv/rvv/base/zvbc_vx_constraint-2.c | 14 ++ .../gcc.target/riscv/rvv/base/zvkg-intrinsic.c | 24 +++ .../gcc.target/riscv/rvv/base/zvkned-intrinsic.c | 104 ++++++++++++ .../gcc.target/riscv/rvv/base/zvknha-intrinsic.c | 33 ++++ .../gcc.target/riscv/rvv/base/zvknhb-intrinsic.c | 33 ++++ .../gcc.target/riscv/rvv/base/zvksed-intrinsic.c | 33 ++++ .../gcc.target/riscv/rvv/base/zvksh-intrinsic.c | 24 +++ gcc/testsuite/gcc.target/riscv/zvkb.c | 13 ++ 12 files changed, 548 insertions(+) create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvbb-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvbb_vandn_vx_constraint.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvbc-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-1.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-2.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvkg-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvkned-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvknha-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvknhb-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvksed-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/zvksh-intrinsic.c create mode 100644 gcc/testsuite/gcc.target/riscv/zvkb.c diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb-intrinsic.c new file mode 100644 index 0000000..b7e25bfe --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb-intrinsic.c @@ -0,0 +1,179 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvbb_zve64x -mabi=lp64d -Wno-psabi" } */ +#include "riscv_vector.h" + +vuint8mf8_t test_vandn_vv_u8mf8(vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl) { + return __riscv_vandn_vv_u8mf8(vs2, vs1, vl); +} + +vuint32m1_t test_vandn_vx_u32m1(vuint32m1_t vs2, uint32_t rs1, size_t vl) { + return __riscv_vandn_vx_u32m1(vs2, rs1, vl); +} + +vuint32m2_t test_vandn_vv_u32m2_m(vbool16_t mask, vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vandn_vv_u32m2_m(mask, vs2, vs1, vl); +} + +vuint16mf2_t test_vandn_vx_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, uint16_t rs1, size_t vl) { + return __riscv_vandn_vx_u16mf2_m(mask, vs2, rs1, vl); +} + +vuint32m4_t test_vandn_vv_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, vuint32m4_t vs1, size_t vl) { + return __riscv_vandn_vv_u32m4_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint64m4_t test_vandn_vx_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vandn_vx_u64m4_tumu(mask, maskedoff, vs2, rs1, vl); +} + +vuint8m8_t test_vbrev_v_u8m8(vuint8m8_t vs2, size_t vl) { + return __riscv_vbrev_v_u8m8(vs2, vl); +} + +vuint16m1_t test_vbrev_v_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t vl) { + return __riscv_vbrev_v_u16m1_m(mask, vs2, vl); +} + +vuint32m4_t test_vbrev_v_u32m4_tumu(vbool8_t mask, vuint32m4_t maskedoff, vuint32m4_t vs2, size_t vl) { + return __riscv_vbrev_v_u32m4_tumu(mask, maskedoff, vs2, vl); +} + +vuint16mf4_t test_vbrev8_v_u16mf4(vuint16mf4_t vs2, size_t vl) { + return __riscv_vbrev8_v_u16mf4(vs2, vl); +} + +vuint32m1_t test_vbrev8_v_u32m1_m(vbool32_t mask, vuint32m1_t vs2, size_t vl) { + return __riscv_vbrev8_v_u32m1_m(mask, vs2, vl); +} + +vuint64m1_t test_vbrev8_v_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t vl) { + return __riscv_vbrev8_v_u64m1_tumu(mask, maskedoff, vs2, vl); +} + +vuint16m4_t test_vrev8_v_u16m4(vuint16m4_t vs2, size_t vl) { + return __riscv_vrev8_v_u16m4(vs2, vl); +} + +vuint8m4_t test_vrev8_v_u8m4_m(vbool2_t mask, vuint8m4_t vs2, size_t vl) { + return __riscv_vrev8_v_u8m4_m(mask, vs2, vl); +} + +vuint32m1_t test_vrev8_v_u32m1_tumu(vbool32_t mask, vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) { + return __riscv_vrev8_v_u32m1_tumu(mask, maskedoff, vs2, vl); +} + +vuint8m8_t test_vrol_vv_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t vl) { + return __riscv_vrol_vv_u8m8(vs2, vs1, vl); +} + +vuint16m4_t test_vrol_vx_u16m4(vuint16m4_t vs2, size_t rs1, size_t vl) { + return __riscv_vrol_vx_u16m4(vs2, rs1, vl); +} + +vuint16mf2_t test_vrol_vv_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, vuint16mf2_t vs1, size_t vl) { + return __riscv_vrol_vv_u16mf2_m(mask, vs2, vs1, vl); +} + +vuint64m1_t test_vrol_vx_u64m1_m(vbool64_t mask, vuint64m1_t vs2, size_t rs1, size_t vl) { + return __riscv_vrol_vx_u64m1_m(mask, vs2, rs1, vl); +} + +vuint8m1_t test_vrol_vv_u8m1_tumu(vbool8_t mask, vuint8m1_t maskedoff, vuint8m1_t vs2, vuint8m1_t vs1, size_t vl) { + return __riscv_vrol_vv_u8m1_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint16m2_t test_vrol_vx_u16m2_tumu(vbool8_t mask, vuint16m2_t maskedoff, vuint16m2_t vs2, size_t rs1, size_t vl) { + return __riscv_vrol_vx_u16m2_tumu(mask, maskedoff, vs2, rs1, vl); +} + +vuint8m8_t test_vror_vv_u8m8(vuint8m8_t vs2, vuint8m8_t vs1, size_t vl) { + return __riscv_vror_vv_u8m8(vs2, vs1, vl); +} + +vuint32m2_t test_vror_vx_u32m2(vuint32m2_t vs2, size_t rs1, size_t vl) { + return __riscv_vror_vx_u32m2(vs2, rs1, vl); +} + +vuint16mf2_t test_vror_vv_u16mf2_m(vbool32_t mask, vuint16mf2_t vs2, vuint16mf2_t vs1, size_t vl) { + return __riscv_vror_vv_u16mf2_m(mask, vs2, vs1, vl); +} + +vuint16m1_t test_vror_vx_u16m1_m(vbool16_t mask, vuint16m1_t vs2, size_t rs1, size_t vl) { + return __riscv_vror_vx_u16m1_m(mask, vs2, rs1, vl); +} + +vuint16mf2_t test_vror_vv_u16mf2_tumu(vbool32_t mask, vuint16mf2_t maskedoff, vuint16mf2_t vs2, vuint16mf2_t vs1, size_t vl) { + return __riscv_vror_vv_u16mf2_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint64m1_t test_vror_vx_u64m1_tumu(vbool64_t mask, vuint64m1_t maskedoff, vuint64m1_t vs2, size_t rs1, size_t vl) { + return __riscv_vror_vx_u64m1_tumu(mask, maskedoff, vs2, rs1, vl); +} + +vuint8m2_t test_vclz_v_u8m2(vuint8m2_t vs2, size_t vl) { + return __riscv_vclz_v_u8m2(vs2, vl); +} + +vuint64m2_t test_vclz_v_u64m2_m(vbool32_t mask, vuint64m2_t vs2, size_t vl) { + return __riscv_vclz_v_u64m2_m(mask, vs2, vl); +} + +vuint16mf4_t test_vctz_v_u16mf4(vuint16mf4_t vs2, size_t vl) { + return __riscv_vctz_v_u16mf4(vs2, vl); +} + +vuint32m8_t test_vctz_v_u32m8_m(vbool4_t mask, vuint32m8_t vs2, size_t vl) { + return __riscv_vctz_v_u32m8_m(mask, vs2, vl); +} + +vuint16mf4_t test_vwsll_vx_u16mf4(vuint8mf8_t vs2, size_t rs1, size_t vl) { + return __riscv_vwsll_vx_u16mf4(vs2, rs1, vl); +} + +vuint16m1_t test_vwsll_vv_u16m1(vuint8mf2_t vs2, vuint8mf2_t vs1, size_t vl) { + return __riscv_vwsll_vv_u16m1(vs2, vs1, vl); +} + +vuint32m2_t test_vwsll_vv_u32m2_m(vbool16_t mask, vuint16m1_t vs2, vuint16m1_t vs1, size_t vl) { + return __riscv_vwsll_vv_u32m2_m(mask, vs2, vs1, vl); +} + +vuint32m2_t test_vwsll_vx_u32m2_m(vbool16_t mask, vuint16m1_t vs2, size_t rs1, size_t vl) { + return __riscv_vwsll_vx_u32m2_m(mask, vs2, rs1, vl); +} + +vuint16mf4_t test_vwsll_vv_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t vs2, vuint8mf8_t vs1, size_t vl) { + return __riscv_vwsll_vv_u16mf4_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint16mf4_t test_vwsll_vx_u16mf4_tumu(vbool64_t mask, vuint16mf4_t maskedoff, vuint8mf8_t vs2, size_t rs1, size_t vl) { + return __riscv_vwsll_vx_u16mf4_tumu(mask, maskedoff, vs2, rs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 26 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*mu} 11 } } */ +/* { dg-final { scan-assembler-times {vandn\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vandn\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vandn\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vandn\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vbrev\.v\s+v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vbrev\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vbrev8\.v\s+v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vbrev8\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vrev8\.v\s+v[0-9]+,\s*v[0-9]} 3} } */ +/* { dg-final { scan-assembler-times {vrev8\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vrol\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vrol\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vrol\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vrol\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vror\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vror\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vror\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vror\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.v\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vclz\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0.t} 1 } } */ +/* { dg-final { scan-assembler-times {vctz\.v\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vctz\.v\s+v[0-9]+,\s*v[0-9]+,\s*v0.t} 1 } } */ +/* { dg-final { scan-assembler-times {vwsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vwsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vwsll\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vwsll\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb_vandn_vx_constraint.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb_vandn_vx_constraint.c new file mode 100644 index 0000000..b3e879e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbb_vandn_vx_constraint.c @@ -0,0 +1,15 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zvbb_zve64x -mabi=ilp32 -O3 -Wno-psabi" } */ +#include "riscv_vector.h" + +vuint64m1_t test_vandn_vx_u64m1(vuint64m1_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vandn_vx_u64m1(vs2, rs1, vl); +} + +vuint64m1_t test_vandn_vx_u64m1_extend(vuint64m1_t vs2, size_t vl) { + uint32_t rs1 = 0x12345678; + return __riscv_vandn_vx_u64m1(vs2, rs1, vl); +} + +/* { dg-final { scan-assembler-times {vandn\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 1 } } */ +/* { dg-final { scan-assembler-times {vandn\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc-intrinsic.c new file mode 100644 index 0000000..ae2a5b6 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc-intrinsic.c @@ -0,0 +1,62 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvbc -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint64m1_t test_vclmul_vv_u64m1(vuint64m1_t vs2, vuint64m1_t vs1, size_t vl) { + return __riscv_vclmul_vv_u64m1(vs2, vs1, vl); +} + +vuint64m1_t test_vclmul_vx_u64m1(vuint64m1_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmul_vx_u64m1(vs2, rs1, vl); +} + +vuint64m2_t test_vclmul_vv_u64m2_m(vbool32_t mask, vuint64m2_t vs2, vuint64m2_t vs1, size_t vl) { + return __riscv_vclmul_vv_u64m2_m(mask, vs2, vs1, vl); +} + +vuint64m2_t test_vclmul_vx_u64m2_m(vbool32_t mask, vuint64m2_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmul_vx_u64m2_m(mask, vs2, rs1, vl); +} + +vuint64m4_t test_vclmul_vv_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, vuint64m4_t vs1, size_t vl) { + return __riscv_vclmul_vv_u64m4_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint64m4_t test_vclmul_vx_u64m4_tumu(vbool16_t mask, vuint64m4_t maskedoff, vuint64m4_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmul_vx_u64m4_tumu(mask, maskedoff, vs2, rs1, vl); +} + +vuint64m2_t test_vclmulh_vv_u64m2(vuint64m2_t vs2, vuint64m2_t vs1, size_t vl) { + return __riscv_vclmulh_vv_u64m2(vs2, vs1, vl); +} + +vuint64m2_t test_vclmulh_vx_u64m2(vuint64m2_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmulh_vx_u64m2(vs2, rs1, vl); +} + +vuint64m1_t test_vclmulh_vx_u64m1_m(vbool64_t mask, vuint64m1_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmulh_vx_u64m1_m(mask, vs2, rs1, vl); +} + +vuint64m2_t test_vclmulh_vv_u64m2_m(vbool32_t mask, vuint64m2_t vs2, vuint64m2_t vs1, size_t vl) { + return __riscv_vclmulh_vv_u64m2_m(mask, vs2, vs1, vl); +} + +vuint64m8_t test_vclmulh_vv_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, vuint64m8_t vs1, size_t vl) { + return __riscv_vclmulh_vv_u64m8_tumu(mask, maskedoff, vs2, vs1, vl); +} + +vuint64m8_t test_vclmulh_vx_u64m8_tumu(vbool8_t mask, vuint64m8_t maskedoff, vuint64m8_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmulh_vx_u64m8_tumu(mask, maskedoff, vs2, rs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 8 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*mu} 4 } } */ +/* { dg-final { scan-assembler-times {vclmul\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vclmul\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vclmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vclmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+,\s*v0.t} 2 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 3 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]+,\s*v0.t} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-1.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-1.c new file mode 100644 index 0000000..8c17163 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-1.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv32gc_zvbc -mabi=ilp32 -O3 -Wno-psabi" } */ +#include "riscv_vector.h" + +vuint64m1_t test_vclmul_vx_u64m1(vuint64m1_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmul_vx_u64m1(vs2, rs1, vl); +} + +vuint64m1_t test_vclmulh_vx_u64m1(vuint64m1_t vs2, uint64_t rs1, size_t vl) { + return __riscv_vclmulh_vx_u64m1(vs2, rs1, vl); +} + +/* { dg-final { scan-assembler-times {vclmul\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 1 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-2.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-2.c new file mode 100644 index 0000000..9ee70d7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvbc_vx_constraint-2.c @@ -0,0 +1,14 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvbc -mabi=lp64d -O3 -Wno-psabi" } */ +#include "riscv_vector.h" + +vuint64m1_t test_vclmul_vx_u64m1_extend(vuint64m1_t vs2, uint32_t rs1, size_t vl) { + return __riscv_vclmul_vx_u64m1(vs2, rs1, vl); +} + +vuint64m1_t test_vclmulh_vx_u64m1_extend(vuint64m1_t vs2, uint32_t rs1, size_t vl) { + return __riscv_vclmulh_vx_u64m1(vs2, rs1, vl); +} + +/* { dg-final { scan-assembler-times {vclmul\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 1 } } */ +/* { dg-final { scan-assembler-times {vclmulh\.vx\s+v[0-9]+,\s*v[0-9]+,\s*a[0-9]} 1 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvkg-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvkg-intrinsic.c new file mode 100644 index 0000000..fa68310 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvkg-intrinsic.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvkg_zve64x -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint32mf2_t test_vgmul_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vgmul_vv_u32mf2(vd, vs2, vl); +} + +vuint32m1_t test_vgmul_vv_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vgmul_vv_u32m1_tu(vd, vs2, vl); +} + +vuint32m2_t test_vghsh_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vghsh_vv_u32m2(vd, vs2, vs1, vl); +} + +vuint32m4_t test_vghsh_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1, size_t vl) { + return __riscv_vghsh_vv_u32m4_tu(vd, vs2, vs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 2 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 2 } } */ +/* { dg-final { scan-assembler-times {vgmul\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vghsh\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvkned-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvkned-intrinsic.c new file mode 100644 index 0000000..4141573 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvkned-intrinsic.c @@ -0,0 +1,104 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvkned_zve64x -mabi=lp64d -O2 -Wno-psabi" } */ +#include "riscv_vector.h" + +vuint32mf2_t test_vaesdf_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vaesdf_vv_u32mf2(vd, vs2, vl); +} + +vuint32mf2_t test_vaesdf_vs_u32mf2_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vaesdf_vs_u32mf2_u32mf2(vd, vs2, vl); +} + +vuint32m2_t test_vaesdf_vv_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vaesdf_vv_u32m2_tu(vd, vs2, vl); +} + +vuint32m2_t test_vaesdf_vs_u32m2_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vaesdf_vs_u32m2_u32m2_tu(vd, vs2, vl); +} + +vuint32m1_t test_vaesdm_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesdm_vv_u32m1(vd, vs2, vl); +} + +vuint32m4_t test_vaesdm_vs_u32m1_u32m4(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesdm_vs_u32m1_u32m4(vd, vs2, vl); +} + +vuint32m1_t test_vaesdm_vv_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesdm_vv_u32m1_tu(vd, vs2, vl); +} + +vuint32m2_t test_vaesdm_vs_u32m1_u32m2_tu(vuint32m2_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesdm_vs_u32m1_u32m2_tu(vd, vs2, vl); +} + +vuint32m2_t test_vaesef_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vaesef_vv_u32m2(vd, vs2, vl); +} + +vuint32m2_t test_vaesef_vs_u32m2_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vaesef_vs_u32m2_u32m2(vd, vs2, vl); +} + +vuint32m4_t test_vaesef_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) { + return __riscv_vaesef_vv_u32m4_tu(vd, vs2, vl); +} + +vuint32m8_t test_vaesef_vs_u32m4_u32m8_tu(vuint32m8_t vd, vuint32m4_t vs2, size_t vl) { + return __riscv_vaesef_vs_u32m4_u32m8_tu(vd, vs2, vl); +} + +vuint32m8_t test_vaesem_vv_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) { + return __riscv_vaesem_vv_u32m8(vd, vs2, vl); +} + +vuint32m8_t test_vaesem_vs_u32m8_u32m8(vuint32m8_t vd, vuint32m8_t vs2, size_t vl) { + return __riscv_vaesem_vs_u32m8_u32m8(vd, vs2, vl); +} + +vuint32mf2_t test_vaesem_vv_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vaesem_vv_u32mf2_tu(vd, vs2, vl); +} + +vuint32m8_t test_vaesem_vs_u32mf2_u32m8_tu(vuint32m8_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vaesem_vs_u32mf2_u32m8_tu(vd, vs2, vl); +} + +vuint32mf2_t test_vaeskf1_vi_u32mf2(vuint32mf2_t vs2, size_t vl) { + return __riscv_vaeskf1_vi_u32mf2(vs2, 0, vl); +} + +vuint32m1_t test_vaeskf1_vi_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) { + return __riscv_vaeskf1_vi_u32m1_tu(maskedoff, vs2, 0, vl); +} + +vuint32m2_t test_vaeskf2_vi_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vaeskf2_vi_u32m2(vd, vs2, 0, vl); +} + +vuint32m4_t test_vaeskf2_vi_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) { + return __riscv_vaeskf2_vi_u32m4_tu(vd, vs2, 0, vl); +} + +vuint32m4_t test_vaesz_vs_u32m1_u32m4(vuint32m4_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesz_vs_u32m1_u32m4(vd, vs2, vl); +} + +vuint32m8_t test_vaesz_vs_u32m1_u32m8_tu(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vaesz_vs_u32m1_u32m8_tu(vd, vs2, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 11 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 11 } } */ +/* { dg-final { scan-assembler-times {vaesdf\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesdf\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesdm\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesdm\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesef\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesef\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesem\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaesem\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vaeskf1\.vi\s+v[0-9]+,\s*v[0-9]+,0} 2 } } */ +/* { dg-final { scan-assembler-times {vaeskf2\.vi\s+v[0-9]+,\s*v[0-9]+,0} 2 } } */ +/* { dg-final { scan-assembler-times {vaesz\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvknha-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvknha-intrinsic.c new file mode 100644 index 0000000..40009ad --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvknha-intrinsic.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvknha_zve64x -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint32mf2_t test_vsha2cl_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, vuint32mf2_t vs1, size_t vl) { + return __riscv_vsha2cl_vv_u32mf2(vd, vs2, vs1, vl); +} + +vuint32m1_t test_vsha2cl_vv_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1, size_t vl) { + return __riscv_vsha2cl_vv_u32m1_tu(vd, vs2, vs1, vl); +} + +vuint32m2_t test_vsha2ch_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vsha2ch_vv_u32m2(vd, vs2, vs1, vl); +} + +vuint32m4_t test_vsha2ch_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1, size_t vl) { + return __riscv_vsha2ch_vv_u32m4_tu(vd, vs2, vs1, vl); +} + +vuint32m4_t test_vsha2ms_vv_u32m4(vuint32m4_t vd, vuint32m4_t vs2, vuint32m4_t vs1, size_t vl) { + return __riscv_vsha2ms_vv_u32m4(vd, vs2, vs1, vl); +} + +vuint32m8_t test_vsha2ms_vv_u32m8_tu(vuint32m8_t vd, vuint32m8_t vs2, vuint32m8_t vs1, size_t vl) { + return __riscv_vsha2ms_vv_u32m8_tu(vd, vs2, vs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsha2cl\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vsha2ch\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vsha2ms\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvknhb-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvknhb-intrinsic.c new file mode 100644 index 0000000..78aebeb --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvknhb-intrinsic.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvknhb -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint32mf2_t test_vsha2cl_vv_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, vuint32mf2_t vs1, size_t vl) { + return __riscv_vsha2cl_vv_u32mf2(vd, vs2, vs1, vl); +} + +vuint32mf2_t test_vsha2cl_vv_u32mf2_tu(vuint32mf2_t vd, vuint32mf2_t vs2, vuint32mf2_t vs1, size_t vl) { + return __riscv_vsha2cl_vv_u32mf2_tu(vd, vs2, vs1, vl); +} + +vuint32m1_t test_vsha2ch_vv_u32m1(vuint32m1_t vd, vuint32m1_t vs2, vuint32m1_t vs1, size_t vl) { + return __riscv_vsha2ch_vv_u32m1(vd, vs2, vs1, vl); +} + +vuint32m2_t test_vsha2ch_vv_u32m2_tu(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vsha2ch_vv_u32m2_tu(vd, vs2, vs1, vl); +} + +vuint32m2_t test_vsha2ms_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vsha2ms_vv_u32m2(vd, vs2, vs1, vl); +} + +vuint64m8_t test_vsha2ms_vv_u64m8_tu(vuint64m8_t vd, vuint64m8_t vs2, vuint64m8_t vs1, size_t vl) { + return __riscv_vsha2ms_vv_u64m8_tu(vd, vs2, vs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsha2cl\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vsha2ch\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vsha2ms\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvksed-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvksed-intrinsic.c new file mode 100644 index 0000000..b655fe8 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvksed-intrinsic.c @@ -0,0 +1,33 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvksed_zve64x -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint32mf2_t test_vsm4k_vi_u32mf2(vuint32mf2_t vs2, size_t vl) { + return __riscv_vsm4k_vi_u32mf2(vs2, 0, vl); +} + +vuint32m1_t test_vsm4k_vi_u32m1_tu(vuint32m1_t maskedoff, vuint32m1_t vs2, size_t vl) { + return __riscv_vsm4k_vi_u32m1_tu(maskedoff, vs2, 0, vl); +} + +vuint32m2_t test_vsm4r_vv_u32m2(vuint32m2_t vd, vuint32m2_t vs2, size_t vl) { + return __riscv_vsm4r_vv_u32m2(vd, vs2, vl); +} + +vuint32m4_t test_vsm4r_vv_u32m4_tu(vuint32m4_t vd, vuint32m4_t vs2, size_t vl) { + return __riscv_vsm4r_vv_u32m4_tu(vd, vs2, vl); +} + +vuint32m4_t test_vsm4r_vs_u32mf2_u32m4(vuint32m4_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vsm4r_vs_u32mf2_u32m4(vd, vs2, vl); +} + +vuint32m8_t test_vsm4r_vs_u32m1_u32m8_tu(vuint32m8_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vsm4r_vs_u32m1_u32m8_tu(vd, vs2, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 3 } } */ +/* { dg-final { scan-assembler-times {vsm4k\.vi\s+v[0-9]+,\s*v[0-9]+,0} 2 } } */ +/* { dg-final { scan-assembler-times {vsm4r\.vv\s+v[0-9]+,\s*v[0-9]} 2 } } */ +/* { dg-final { scan-assembler-times {vsm4r\.vs\s+v[0-9]+,\s*v[0-9]} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/zvksh-intrinsic.c b/gcc/testsuite/gcc.target/riscv/rvv/base/zvksh-intrinsic.c new file mode 100644 index 0000000..353e4e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/zvksh-intrinsic.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvksh_zve64x -mabi=lp64d -O2 -Wno-psabi" } */ + +#include "riscv_vector.h" + +vuint32mf2_t test_vsm3c_vi_u32mf2(vuint32mf2_t vd, vuint32mf2_t vs2, size_t vl) { + return __riscv_vsm3c_vi_u32mf2(vd, vs2, 0, vl); +} + +vuint32m1_t test_vsm3c_vi_u32m1_tu(vuint32m1_t vd, vuint32m1_t vs2, size_t vl) { + return __riscv_vsm3c_vi_u32m1_tu(vd, vs2, 0, vl); +} + +vuint32m2_t test_vsm3me_vv_u32m2(vuint32m2_t vs2, vuint32m2_t vs1, size_t vl) { + return __riscv_vsm3me_vv_u32m2(vs2, vs1, vl); +} + +vuint32m4_t test_vsm3me_vv_u32m4_tu(vuint32m4_t maskedoff, vuint32m4_t vs2, vuint32m4_t vs1, size_t vl) { + return __riscv_vsm3me_vv_u32m4_tu(maskedoff, vs2, vs1, vl); +} +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*ta,\s*ma} 2 } } */ +/* { dg-final { scan-assembler-times {vsetvli\s*zero,\s*[a-x0-9]+,\s*[a-x0-9]+,m[a-x0-9]+,\s*tu,\s*ma} 2 } } */ +/* { dg-final { scan-assembler-times {vsm3c\.vi\s+v[0-9]+,\s*v[0-9]+,0} 2 } } */ +/* { dg-final { scan-assembler-times {vsm3me\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */ diff --git a/gcc/testsuite/gcc.target/riscv/zvkb.c b/gcc/testsuite/gcc.target/riscv/zvkb.c new file mode 100644 index 0000000..d5c28e7 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/zvkb.c @@ -0,0 +1,13 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gc_zvkb" { target { rv64 } } } */ +/* { dg-options "-march=rv32gc_zvkb" { target { rv32 } } } */ + +#ifndef __riscv_zvkb +#error "Feature macro not defined" +#endif + +int +foo (int a) +{ + return a; +} -- cgit v1.1 From 2e4607666c3238a62d08468720549e70e71417c3 Mon Sep 17 00:00:00 2001 From: Juzhe-Zhong Date: Tue, 9 Jan 2024 10:13:40 +0800 Subject: RISC-V: Fix comments of segment load/store intrinsic [NFC] We have supported segment load/store intrinsics. Committed as it is obvious. gcc/ChangeLog: * config/riscv/riscv-vector-builtins-functions.def (vleff): Move comments. (vundefined): Ditto. --- gcc/config/riscv/riscv-vector-builtins-functions.def | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def index 96dd0d9..f742c98 100644 --- a/gcc/config/riscv/riscv-vector-builtins-functions.def +++ b/gcc/config/riscv/riscv-vector-builtins-functions.def @@ -79,8 +79,6 @@ DEF_RVV_FUNCTION (vsoxei64, indexed_loadstore, none_m_preds, all_v_scalar_ptr_ee // 7.7. Unit-stride Fault-Only-First Loads DEF_RVV_FUNCTION (vleff, fault_load, full_preds, all_v_scalar_const_ptr_size_ptr_ops) -// TODO: 7.8. Vector Load/Store Segment Instructions - /* 11. Vector Integer Arithmetic Instructions. */ // 11.1. Vector Single-Width Integer Add and Subtract @@ -630,6 +628,8 @@ DEF_RVV_FUNCTION (vset, vset, none_preds, all_v_vset_tuple_ops) DEF_RVV_FUNCTION (vget, vget, none_preds, all_v_vget_tuple_ops) DEF_RVV_FUNCTION (vcreate, vcreate, none_preds, all_v_vcreate_tuple_ops) DEF_RVV_FUNCTION (vundefined, vundefined, none_preds, all_none_void_tuple_ops) + +// 7.8. Vector Load/Store Segment Instructions DEF_RVV_FUNCTION (vlseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ops) DEF_RVV_FUNCTION (vsseg, seg_loadstore, none_m_preds, tuple_v_scalar_ptr_ops) DEF_RVV_FUNCTION (vlsseg, seg_loadstore, full_preds, tuple_v_scalar_const_ptr_ptrdiff_ops) -- cgit v1.1 From 34d339bbd0c1f5b4ad9587e7ae8387c912cb028b Mon Sep 17 00:00:00 2001 From: Jiahao Xu Date: Fri, 5 Jan 2024 15:38:25 +0800 Subject: LoongArch: Implement vec_init where N is a LSX vector mode This patch implements more vec_init optabs that can handle two LSX vectors producing a LASX vector by concatenating them. When an lsx vector is concatenated with an LSX const_vector of zeroes, the vec_concatz pattern can be used effectively. For example as below typedef short v8hi __attribute__ ((vector_size (16))); typedef short v16hi __attribute__ ((vector_size (32))); v8hi a, b; v16hi vec_initv16hiv8hi () { return __builtin_shufflevector (a, b, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); } Before this patch: vec_initv16hiv8hi: addi.d $r3,$r3,-64 .cfi_def_cfa_offset 64 xvrepli.h $xr0,0 la.local $r12,.LANCHOR0 xvst $xr0,$r3,0 xvst $xr0,$r3,32 vld $vr0,$r12,0 vst $vr0,$r3,0 vld $vr0,$r12,16 vst $vr0,$r3,32 xvld $xr1,$r3,32 xvld $xr2,$r3,32 xvld $xr0,$r3,0 xvilvh.h $xr0,$xr1,$xr0 xvld $xr1,$r3,0 xvilvl.h $xr1,$xr2,$xr1 addi.d $r3,$r3,64 .cfi_def_cfa_offset 0 xvpermi.q $xr0,$xr1,32 jr $r1 After this patch: vec_initv16hiv8hi: la.local $r12,.LANCHOR0 vld $vr0,$r12,32 vld $vr2,$r12,48 xvilvh.h $xr1,$xr2,$xr0 xvilvl.h $xr0,$xr2,$xr0 xvpermi.q $xr1,$xr0,32 xvst $xr1,$r4,0 jr $r1 gcc/ChangeLog: * config/loongarch/lasx.md (vec_initv32qiv16qi): Rename to .. (vec_init): .. this, and extend to mode. (@vec_concatz): New insn pattern. * config/loongarch/loongarch.cc (loongarch_expand_vector_group_init): Handle VALS containing two vectors. gcc/testsuite/ChangeLog: * gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c: New test. --- gcc/config/loongarch/lasx.md | 26 ++++++++- gcc/config/loongarch/loongarch.cc | 44 +++++++++++++-- .../loongarch/vector/lasx/lasx-vec-init-2.c | 65 ++++++++++++++++++++++ 3 files changed, 128 insertions(+), 7 deletions(-) create mode 100644 gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md index 6c7e373..c2bde4d 100644 --- a/gcc/config/loongarch/lasx.md +++ b/gcc/config/loongarch/lasx.md @@ -465,6 +465,11 @@ (V16HI "w") (V32QI "w")]) +;; Half modes of all LASX vector modes, in lower-case. +(define_mode_attr lasxhalf [(V32QI "v16qi") (V16HI "v8hi") + (V8SI "v4si") (V4DI "v2di") + (V8SF "v4sf") (V4DF "v2df")]) + (define_expand "vec_init" [(match_operand:LASX 0 "register_operand") (match_operand:LASX 1 "")] @@ -474,9 +479,9 @@ DONE; }) -(define_expand "vec_initv32qiv16qi" - [(match_operand:V32QI 0 "register_operand") - (match_operand:V16QI 1 "")] +(define_expand "vec_init" + [(match_operand:LASX 0 "register_operand") + (match_operand: 1 "")] "ISA_HAS_LASX" { loongarch_expand_vector_group_init (operands[0], operands[1]); @@ -577,6 +582,21 @@ [(set_attr "type" "simd_insert") (set_attr "mode" "")]) +(define_insn "@vec_concatz" + [(set (match_operand:LASX 0 "register_operand" "=f") + (vec_concat:LASX + (match_operand: 1 "nonimmediate_operand") + (match_operand: 2 "const_0_operand")))] + "ISA_HAS_LASX" +{ + if (MEM_P (operands[1])) + return "vld\t%w0,%1"; + else + return "vori.b\t%w0,%w1,0"; +} + [(set_attr "type" "simd_splat") + (set_attr "mode" "")]) + (define_insn "vec_concat" [(set (match_operand:LASX 0 "register_operand" "=f") (vec_concat:LASX diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc index ec376a7..1c90afc 100644 --- a/gcc/config/loongarch/loongarch.cc +++ b/gcc/config/loongarch/loongarch.cc @@ -9847,10 +9847,46 @@ loongarch_gen_const_int_vector_shuffle (machine_mode mode, int val) void loongarch_expand_vector_group_init (rtx target, rtx vals) { - rtx ops[2] = { force_reg (E_V16QImode, XVECEXP (vals, 0, 0)), - force_reg (E_V16QImode, XVECEXP (vals, 0, 1)) }; - emit_insn (gen_rtx_SET (target, gen_rtx_VEC_CONCAT (E_V32QImode, ops[0], - ops[1]))); + machine_mode vmode = GET_MODE (target); + machine_mode half_mode = VOIDmode; + rtx low = XVECEXP (vals, 0, 0); + rtx high = XVECEXP (vals, 0, 1); + + switch (vmode) + { + case E_V32QImode: + half_mode = V16QImode; + break; + case E_V16HImode: + half_mode = V8HImode; + break; + case E_V8SImode: + half_mode = V4SImode; + break; + case E_V4DImode: + half_mode = V2DImode; + break; + case E_V8SFmode: + half_mode = V4SFmode; + break; + case E_V4DFmode: + half_mode = V2DFmode; + break; + default: + gcc_unreachable (); + } + + if (high == CONST0_RTX (half_mode)) + emit_insn (gen_vec_concatz (vmode, target, low, high)); + else + { + if (!register_operand (low, half_mode)) + low = force_reg (half_mode, low); + if (!register_operand (high, half_mode)) + high = force_reg (half_mode, high); + emit_insn (gen_rtx_SET (target, + gen_rtx_VEC_CONCAT (vmode, low, high))); + } } /* Expand initialization of a vector which has all same elements. */ diff --git a/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c new file mode 100644 index 0000000..7592198 --- /dev/null +++ b/gcc/testsuite/gcc.target/loongarch/vector/lasx/lasx-vec-init-2.c @@ -0,0 +1,65 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -fno-vect-cost-model -mlasx" } */ +/* { dg-final { scan-assembler-times "vld" 12 } } */ + + +typedef char v16qi __attribute__ ((vector_size (16))); +typedef char v32qi __attribute__ ((vector_size (32))); + +typedef short v8hi __attribute__ ((vector_size (16))); +typedef short v16hi __attribute__ ((vector_size (32))); + +typedef int v4si __attribute__ ((vector_size (16))); +typedef int v8si __attribute__ ((vector_size (32))); + +typedef long v2di __attribute__ ((vector_size (16))); +typedef long v4di __attribute__ ((vector_size (32))); + +typedef float v4sf __attribute__ ((vector_size (16))); +typedef float v8sf __attribute__ ((vector_size (32))); + +typedef double v2df __attribute__ ((vector_size (16))); +typedef double v4df __attribute__ ((vector_size (32))); + +v16qi a_qi, b_qi; +v8hi a_hi, b_hi; +v4si a_si, b_si; +v2di a_di, b_di; +v4sf a_sf, b_sf; +v2df a_df, b_df; + +v32qi +foo_v32qi () +{ + return __builtin_shufflevector (a_qi, b_qi, 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23, 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31); +} + +v16hi +foo_v16qi () +{ + return __builtin_shufflevector (a_hi, b_hi, 0, 8, 1, 9, 2, 10, 3, 11, 4, 12, 5, 13, 6, 14, 7, 15); +} + +v8si +foo_v8si () +{ + return __builtin_shufflevector (a_si, b_si, 0, 4, 1, 5, 2, 6, 3, 7); +} + +v4di +foo_v4di () +{ + return __builtin_shufflevector (a_di, b_di, 0, 2, 1, 3); +} + +v8sf +foo_v8sf () +{ + return __builtin_shufflevector (a_sf, b_sf, 0, 4, 1, 5, 2, 6, 3, 7); +} + +v4df +foo_v4df () +{ + return __builtin_shufflevector (a_df, b_df, 0, 2, 1, 3); +} -- cgit v1.1 From ab6224dfe12bd57f02343375a03c8a979e927d93 Mon Sep 17 00:00:00 2001 From: Haochen Jiang Date: Mon, 8 Jan 2024 16:36:38 +0800 Subject: i386: Fix recent testcase fail After commit 01f4251b8775c832a92d55e2df57c9ac72eaceef, early break vectorization is supported. The two testcases need to be fixed. gcc/testsuite/ChangeLog: * gcc.target/i386/avx512fp16-xorsign-1.c: Fix testcase. * gcc.target/i386/part-vect-absneghf.c: Ditto. --- gcc/testsuite/gcc.target/i386/avx512fp16-xorsign-1.c | 2 +- gcc/testsuite/gcc.target/i386/part-vect-absneghf.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/gcc/testsuite/gcc.target/i386/avx512fp16-xorsign-1.c b/gcc/testsuite/gcc.target/i386/avx512fp16-xorsign-1.c index a22a6ce..f5dd457 100644 --- a/gcc/testsuite/gcc.target/i386/avx512fp16-xorsign-1.c +++ b/gcc/testsuite/gcc.target/i386/avx512fp16-xorsign-1.c @@ -35,7 +35,7 @@ do_test (void) abort (); } -/* { dg-final { scan-tree-dump-times "vectorized 1 loops" 1 "vect" } } */ +/* { dg-final { scan-tree-dump-times "vectorized 2 loops" 1 "vect" } } */ /* { dg-final { scan-assembler "\[ \t\]xor" } } */ /* { dg-final { scan-assembler "\[ \t\]and" } } */ /* { dg-final { scan-assembler-not "copysign" } } */ diff --git a/gcc/testsuite/gcc.target/i386/part-vect-absneghf.c b/gcc/testsuite/gcc.target/i386/part-vect-absneghf.c index 48aed14..713f0bf 100644 --- a/gcc/testsuite/gcc.target/i386/part-vect-absneghf.c +++ b/gcc/testsuite/gcc.target/i386/part-vect-absneghf.c @@ -1,5 +1,5 @@ /* { dg-do run { target avx512fp16 } } */ -/* { dg-options "-O1 -mavx512fp16 -mavx512vl -ftree-vectorize -fdump-tree-slp-details -fdump-tree-optimized" } */ +/* { dg-options "-O1 -mavx512fp16 -mavx512vl -fdump-tree-slp-details -fdump-tree-optimized" } */ extern void abort (); -- cgit v1.1 From 6e249a9ad9d26fb01b147d33be9f9bfebca85c24 Mon Sep 17 00:00:00 2001 From: Chung-Ju Wu Date: Tue, 9 Jan 2024 14:26:18 +0800 Subject: arm: Add support for Arm Cortex-M52 CPU. This patch adds the -mcpu support for the Arm Cortex-M52 CPU which is an Armv8.1-M Mainline CPU supporting MVE and PACBTI by default. -mcpu=cortex-m52 switch by default matches to -march=armv8.1-m.main+pacbti+mve.fp+fp.dp. The cde feature is supported by specifying +cdecpN (e.g. -mcpu=cortex-m52+cdecp), where N is the coprocessor number 0 to 7. Also following options are provided to disable default features. +nomve.fp (disables MVE Floating point) +nomve (disables MVE Integer and MVE Floating point) +nodsp (disables dsp, MVE Integer and MVE Floating point) +nopacbti (disables pacbti) +nofp (disables floating point and MVE floating point) Signed-off-by: Chung-Ju Wu gcc/ChangeLog: * config/arm/arm-cpus.in (cortex-m52): New cpu. * config/arm/arm-tables.opt: Regenerate. * config/arm/arm-tune.md: Regenerate. --- gcc/config/arm/arm-cpus.in | 21 +++++++++++++++++++++ gcc/config/arm/arm-tables.opt | 3 +++ gcc/config/arm/arm-tune.md | 6 +++--- 3 files changed, 27 insertions(+), 3 deletions(-) diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in index 6fa7e31..451b15f 100644 --- a/gcc/config/arm/arm-cpus.in +++ b/gcc/config/arm/arm-cpus.in @@ -1641,6 +1641,27 @@ begin cpu cortex-m35p costs v7m end cpu cortex-m35p +begin cpu cortex-m52 + cname cortexm52 + tune flags LDSCHED + architecture armv8.1-m.main+pacbti+mve.fp+fp.dp + option nopacbti remove pacbti + option nomve.fp remove mve_float + option nomve remove mve mve_float + option nofp remove ALL_FP mve_float + option nodsp remove MVE mve_float + option cdecp0 add cdecp0 + option cdecp1 add cdecp1 + option cdecp2 add cdecp2 + option cdecp3 add cdecp3 + option cdecp4 add cdecp4 + option cdecp5 add cdecp5 + option cdecp6 add cdecp6 + option cdecp7 add cdecp7 + isa quirk_no_asmcpu quirk_vlldm + costs v7m +end cpu cortex-m52 + begin cpu cortex-m55 cname cortexm55 tune flags LDSCHED diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt index 9d6ae87..d3eb9a9 100644 --- a/gcc/config/arm/arm-tables.opt +++ b/gcc/config/arm/arm-tables.opt @@ -283,6 +283,9 @@ EnumValue Enum(processor_type) String(cortex-m35p) Value( TARGET_CPU_cortexm35p) EnumValue +Enum(processor_type) String(cortex-m52) Value( TARGET_CPU_cortexm52) + +EnumValue Enum(processor_type) String(cortex-m55) Value( TARGET_CPU_cortexm55) EnumValue diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md index 7318f03..6a631d8 100644 --- a/gcc/config/arm/arm-tune.md +++ b/gcc/config/arm/arm-tune.md @@ -49,7 +49,7 @@ cortexa710,cortexx1,cortexx1c, neoversen1,cortexa75cortexa55,cortexa76cortexa55, neoversev1,neoversen2,cortexm23, - cortexm33,cortexm35p,cortexm55, - starmc1,cortexm85,cortexr52, - cortexr52plus" + cortexm33,cortexm35p,cortexm52, + cortexm55,starmc1,cortexm85, + cortexr52,cortexr52plus" (const (symbol_ref "((enum attr_tune) arm_tune)"))) -- cgit v1.1 From 43c4f982113076ad54c3405f865cc63b0a5ba5aa Mon Sep 17 00:00:00 2001 From: Chung-Ju Wu Date: Tue, 9 Jan 2024 14:26:18 +0800 Subject: arm: Add Arm Cortex-M52 CPU documentation. Signed-off-by: Chung-Ju Wu gcc/ChangeLog: * doc/invoke.texi (Arm Options): Document Cortex-M52 options. --- gcc/doc/invoke.texi | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi index 8cf99f3..a494420 100644 --- a/gcc/doc/invoke.texi +++ b/gcc/doc/invoke.texi @@ -23103,7 +23103,7 @@ Permissible names are: @samp{arm7tdmi}, @samp{arm7tdmi-s}, @samp{arm710t}, @samp{cortex-r7}, @samp{cortex-r8}, @samp{cortex-r52}, @samp{cortex-r52plus}, @samp{cortex-m0}, @samp{cortex-m0plus}, @samp{cortex-m1}, @samp{cortex-m3}, @samp{cortex-m4}, @samp{cortex-m7}, @samp{cortex-m23}, @samp{cortex-m33}, -@samp{cortex-m35p}, @samp{cortex-m55}, @samp{cortex-m85}, @samp{cortex-x1}, +@samp{cortex-m35p}, @samp{cortex-m52}, @samp{cortex-m55}, @samp{cortex-m85}, @samp{cortex-x1}, @samp{cortex-x1c}, @samp{cortex-m1.small-multiply}, @samp{cortex-m0.small-multiply}, @samp{cortex-m0plus.small-multiply}, @samp{exynos-m1}, @samp{marvell-pj4}, @samp{neoverse-n1}, @samp{neoverse-n2}, @samp{neoverse-v1}, @samp{xscale}, @@ -23169,34 +23169,34 @@ The following extension options are common to the listed CPUs: @table @samp @item +nodsp Disable the DSP instructions on @samp{cortex-m33}, @samp{cortex-m35p}, -@samp{cortex-m55} and @samp{cortex-m85}. Also disable the M-Profile Vector -Extension (MVE) integer and single precision floating-point instructions on -@samp{cortex-m55} and @samp{cortex-m85}. +@samp{cortex-m52}, @samp{cortex-m55} and @samp{cortex-m85}. +Also disable the M-Profile Vector Extension (MVE) integer and +single precision floating-point instructions on +@samp{cortex-m52}, @samp{cortex-m55} and @samp{cortex-m85}. @item +nopacbti Disable the Pointer Authentication and Branch Target Identification Extension -on @samp{cortex-m85}. +on @samp{cortex-m52} and @samp{cortex-m85}. @item +nomve Disable the M-Profile Vector Extension (MVE) integer and single precision -floating-point instructions on @samp{cortex-m55} and @samp{cortex-m85}. +floating-point instructions on @samp{cortex-m52}, @samp{cortex-m55} and @samp{cortex-m85}. @item +nomve.fp Disable the M-Profile Vector Extension (MVE) single precision floating-point -instructions on @samp{cortex-m55} and @samp{cortex-m85}. +instructions on @samp{cortex-m52}, @samp{cortex-m55} and @samp{cortex-m85}. @item +cdecp0, +cdecp1, ... , +cdecp7 Enable the Custom Datapath Extension (CDE) on selected coprocessors according -to the numbers given in the options in the range 0 to 7 on @samp{cortex-m55}. +to the numbers given in the options in the range 0 to 7 on @samp{cortex-m52} and @samp{cortex-m55}. @item +nofp Disables the floating-point instructions on @samp{arm9e}, @samp{arm946e-s}, @samp{arm966e-s}, @samp{arm968e-s}, @samp{arm10e}, @samp{arm1020e}, @samp{arm1022e}, @samp{arm926ej-s}, @samp{arm1026ej-s}, @samp{cortex-r5}, @samp{cortex-r7}, @samp{cortex-r8}, -@samp{cortex-m4}, @samp{cortex-m7}, @samp{cortex-m33}, @samp{cortex-m35p} @samp{cortex-m4}, @samp{cortex-m7}, @samp{cortex-m33}, @samp{cortex-m35p}, -@samp{cortex-m55} and @samp{cortex-m85}. +@samp{cortex-m52}, @samp{cortex-m55} and @samp{cortex-m85}. Disables the floating-point and SIMD instructions on @samp{generic-armv7-a}, @samp{cortex-a5}, @samp{cortex-a7}, @samp{cortex-a8}, @samp{cortex-a9}, @samp{cortex-a12}, @@ -23539,9 +23539,9 @@ Development Tools Engineering Specification", which can be found on Mitigate against a potential security issue with the @code{VLLDM} instruction in some M-profile devices when using CMSE (CVE-2021-365465). This option is enabled by default when the option @option{-mcpu=} is used with -@code{cortex-m33}, @code{cortex-m35p}, @code{cortex-m55}, @code{cortex-m85} -or @code{star-mc1}. The option @option{-mno-fix-cmse-cve-2021-35465} can be used -to disable the mitigation. +@code{cortex-m33}, @code{cortex-m35p}, @code{cortex-m52}, @code{cortex-m55}, +@code{cortex-m85} or @code{star-mc1}. The option @option{-mno-fix-cmse-cve-2021-35465} +can be used to disable the mitigation. @opindex mstack-protector-guard @opindex mstack-protector-guard-offset -- cgit v1.1 From 6a67fdcb3f0cc8be47b49ddd246d0c50c3770800 Mon Sep 17 00:00:00 2001 From: Roger Sayle Date: Tue, 9 Jan 2024 08:28:42 +0000 Subject: i386: PR target/112992: Optimize mode for broadcast of constants. The issue addressed by this patch is that when initializing vectors by broadcasting integer constants, the compiler has the flexibility to select the most appropriate vector mode to perform the broadcast, as long as the resulting vector has an identical bit pattern. For example, the following constants are all equivalent: V4SImode {0x01010101, 0x01010101, 0x01010101, 0x01010101 } V8HImode {0x0101, 0x0101, 0x0101, 0x0101, 0x0101, 0x0101, 0x0101, 0x0101 } V16QImode {0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, ... 0x01 } So instruction sequences that construct any of these can be used to construct the others (with a suitable cast/SUBREG). On x86_64, it turns out that broadcasts of SImode constants are preferred, as DImode constants often require a longer movabs instruction, and HImode and QImode broadcasts require multiple uops on some architectures. Hence, SImode is always the equal shortest/fastest implementation. Examples of this improvement, can be seen in the testsuite. gcc.target/i386/pr102021.c Before: 0: 48 b8 0c 00 0c 00 0c movabs $0xc000c000c000c,%rax 7: 00 0c 00 a: 62 f2 fd 28 7c c0 vpbroadcastq %rax,%ymm0 10: c3 retq After: 0: b8 0c 00 0c 00 mov $0xc000c,%eax 5: 62 f2 7d 28 7c c0 vpbroadcastd %eax,%ymm0 b: c3 retq and gcc.target/i386/pr90773-17.c: Before: 0: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 7 7: b8 0c 00 00 00 mov $0xc,%eax c: 62 f2 7d 08 7a c0 vpbroadcastb %eax,%xmm0 12: 62 f1 7f 08 7f 02 vmovdqu8 %xmm0,(%rdx) 18: c7 42 0f 0c 0c 0c 0c movl $0xc0c0c0c,0xf(%rdx) 1f: c3 retq After: 0: 48 8b 15 00 00 00 00 mov 0x0(%rip),%rdx # 7 7: b8 0c 0c 0c 0c mov $0xc0c0c0c,%eax c: 62 f2 7d 08 7c c0 vpbroadcastd %eax,%xmm0 12: 62 f1 7f 08 7f 02 vmovdqu8 %xmm0,(%rdx) 18: c7 42 0f 0c 0c 0c 0c movl $0xc0c0c0c,0xf(%rdx) 1f: c3 retq where according to Agner Fog's instruction tables broadcastd is slightly faster on some microarchitectures, for example Knight's Landing. 2024-01-09 Roger Sayle Hongtao Liu gcc/ChangeLog PR target/112992 * config/i386/i386-expand.cc (ix86_convert_const_wide_int_to_broadcast): Allow call to ix86_expand_vector_init_duplicate to fail, and return NULL_RTX. (ix86_broadcast_from_constant): Revert recent change; Return a suitable MEMREF independently of mode/target combinations. (ix86_expand_vector_move): Allow ix86_expand_vector_init_duplicate to decide whether expansion is possible/preferrable. Only try forcing DImode constants to memory (and trying again) if calling ix86_expand_vector_init_duplicate fails with an DImode immediate constant. (ix86_expand_vector_init_duplicate) : Try using V4SImode for suitable immediate constants. : Try using V8SImode for suitable constants. : Fail for CONST_INT_P, i.e. use constant pool. : Likewise. : For CONST_INT_P try using V4SImode via widen. : For CONT_INT_P try using V8HImode via widen.