From 7327014b49ba0e4c8227edaed569d21d3cc1ec74 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 16:25:18 -0500 Subject: [libc] Implement temporary `printf` on the GPU (#85331) Summary: This patch adds a temporary implementation that uses a struct-based interface in lieu of varargs support. Once varargs support exists we will move this implementation to the "real" printf implementation. Conceptually, this patch has the client copy over its format string and arguments to the server. The server will then scan the format string searching for any specifiers that are actually a string. If it is a string then we will send the pointer back to the server to tell it to copy it back. This copied value will then replace the pointer when the final formatting is done. This will require a built-in extension to the varargs support to get access to the underlying struct. The varargs used on the GPU will simply be a struct wrapped in a varargs ABI. --- libc/config/gpu/entrypoints.txt | 1 + libc/include/llvm-libc-types/rpc_opcodes_t.h | 3 + libc/spec/gpu_ext.td | 8 ++ libc/src/__support/arg_list.h | 38 ++++++ libc/src/gpu/CMakeLists.txt | 12 ++ libc/src/gpu/rpc_fprintf.cpp | 71 ++++++++++ libc/src/gpu/rpc_fprintf.h | 22 +++ libc/test/integration/src/stdio/CMakeLists.txt | 3 + libc/test/integration/src/stdio/gpu/CMakeLists.txt | 21 +++ libc/test/integration/src/stdio/gpu/printf.cpp | 88 ++++++++++++ libc/utils/gpu/server/CMakeLists.txt | 10 +- libc/utils/gpu/server/rpc_server.cpp | 148 +++++++++++++++++++++ 12 files changed, 424 insertions(+), 1 deletion(-) create mode 100644 libc/src/gpu/rpc_fprintf.cpp create mode 100644 libc/src/gpu/rpc_fprintf.h create mode 100644 libc/test/integration/src/stdio/gpu/CMakeLists.txt create mode 100644 libc/test/integration/src/stdio/gpu/printf.cpp diff --git a/libc/config/gpu/entrypoints.txt b/libc/config/gpu/entrypoints.txt index 4fb87cb..b678350 100644 --- a/libc/config/gpu/entrypoints.txt +++ b/libc/config/gpu/entrypoints.txt @@ -211,6 +211,7 @@ set(TARGET_LIBC_ENTRYPOINTS # gpu/rpc.h entrypoints libc.src.gpu.rpc_host_call + libc.src.gpu.rpc_fprintf ) set(TARGET_LIBM_ENTRYPOINTS diff --git a/libc/include/llvm-libc-types/rpc_opcodes_t.h b/libc/include/llvm-libc-types/rpc_opcodes_t.h index 919ea03..faed7b5 100644 --- a/libc/include/llvm-libc-types/rpc_opcodes_t.h +++ b/libc/include/llvm-libc-types/rpc_opcodes_t.h @@ -31,6 +31,9 @@ typedef enum { RPC_FTELL, RPC_FFLUSH, RPC_UNGETC, + RPC_PRINTF_TO_STDOUT, + RPC_PRINTF_TO_STDERR, + RPC_PRINTF_TO_STREAM, RPC_LAST = 0xFFFF, } rpc_opcode_t; diff --git a/libc/spec/gpu_ext.td b/libc/spec/gpu_ext.td index dce81ff..5400e0a 100644 --- a/libc/spec/gpu_ext.td +++ b/libc/spec/gpu_ext.td @@ -10,6 +10,14 @@ def GPUExtensions : StandardSpec<"GPUExtensions"> { RetValSpec, [ArgSpec, ArgSpec, ArgSpec] >, + FunctionSpec< + "rpc_fprintf", + RetValSpec, + [ArgSpec, + ArgSpec, + ArgSpec, + ArgSpec] + >, ] >; let Headers = [ diff --git a/libc/src/__support/arg_list.h b/libc/src/__support/arg_list.h index 9de1765..0965e12 100644 --- a/libc/src/__support/arg_list.h +++ b/libc/src/__support/arg_list.h @@ -13,6 +13,7 @@ #include #include +#include namespace LIBC_NAMESPACE { namespace internal { @@ -60,6 +61,43 @@ public: size_t read_count() const { return arg_counter; } }; +// Used for the GPU implementation of `printf`. This models a variadic list as a +// simple array of pointers that are built manually by the implementation. +class StructArgList { + void *ptr; + void *end; + +public: + LIBC_INLINE StructArgList(void *ptr, size_t size) + : ptr(ptr), end(reinterpret_cast(ptr) + size) {} + LIBC_INLINE StructArgList(const StructArgList &other) { + ptr = other.ptr; + end = other.end; + } + LIBC_INLINE StructArgList() = default; + LIBC_INLINE ~StructArgList() = default; + + LIBC_INLINE StructArgList &operator=(const StructArgList &rhs) { + ptr = rhs.ptr; + return *this; + } + + LIBC_INLINE void *get_ptr() const { return ptr; } + + template LIBC_INLINE T next_var() { + ptr = reinterpret_cast( + ((reinterpret_cast(ptr) + alignof(T) - 1) / alignof(T)) * + alignof(T)); + + if (ptr >= end) + return T(-1); + + T val = *reinterpret_cast(ptr); + ptr = reinterpret_cast(ptr) + sizeof(T); + return val; + } +}; + } // namespace internal } // namespace LIBC_NAMESPACE diff --git a/libc/src/gpu/CMakeLists.txt b/libc/src/gpu/CMakeLists.txt index e202285..4508abe 100644 --- a/libc/src/gpu/CMakeLists.txt +++ b/libc/src/gpu/CMakeLists.txt @@ -8,3 +8,15 @@ add_entrypoint_object( libc.src.__support.RPC.rpc_client libc.src.__support.GPU.utils ) + +add_entrypoint_object( + rpc_fprintf + SRCS + rpc_fprintf.cpp + HDRS + rpc_fprintf.h + DEPENDS + libc.src.stdio.gpu.gpu_file + libc.src.__support.RPC.rpc_client + libc.src.__support.GPU.utils +) diff --git a/libc/src/gpu/rpc_fprintf.cpp b/libc/src/gpu/rpc_fprintf.cpp new file mode 100644 index 0000000..7b0e60b --- /dev/null +++ b/libc/src/gpu/rpc_fprintf.cpp @@ -0,0 +1,71 @@ +//===-- GPU implementation of fprintf -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "rpc_fprintf.h" + +#include "src/__support/CPP/string_view.h" +#include "src/__support/GPU/utils.h" +#include "src/__support/RPC/rpc_client.h" +#include "src/__support/common.h" +#include "src/stdio/gpu/file.h" + +namespace LIBC_NAMESPACE { + +template +int fprintf_impl(::FILE *__restrict file, const char *__restrict format, + size_t format_size, void *args, size_t args_size) { + uint64_t mask = gpu::get_lane_mask(); + rpc::Client::Port port = rpc::client.open(); + + if constexpr (opcode == RPC_PRINTF_TO_STREAM) { + port.send([&](rpc::Buffer *buffer) { + buffer->data[0] = reinterpret_cast(file); + }); + } + + port.send_n(format, format_size); + port.send_n(args, args_size); + + uint32_t ret = 0; + for (;;) { + const char *str = nullptr; + port.recv([&](rpc::Buffer *buffer) { + ret = static_cast(buffer->data[0]); + str = reinterpret_cast(buffer->data[1]); + }); + // If any lanes have a string argument it needs to be copied back. + if (!gpu::ballot(mask, str)) + break; + + uint64_t size = str ? internal::string_length(str) + 1 : 0; + port.send_n(str, size); + } + + port.close(); + return ret; +} + +// TODO: This is a stand-in function that uses a struct pointer and size in +// place of varargs. Once varargs support is added we will use that to +// implement the real version. +LLVM_LIBC_FUNCTION(int, rpc_fprintf, + (::FILE *__restrict stream, const char *__restrict format, + void *args, size_t size)) { + cpp::string_view str(format); + if (stream == stdout) + return fprintf_impl(stream, format, str.size() + 1, + args, size); + else if (stream == stderr) + return fprintf_impl(stream, format, str.size() + 1, + args, size); + else + return fprintf_impl(stream, format, str.size() + 1, + args, size); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/src/gpu/rpc_fprintf.h b/libc/src/gpu/rpc_fprintf.h new file mode 100644 index 0000000..053f7b4 --- /dev/null +++ b/libc/src/gpu/rpc_fprintf.h @@ -0,0 +1,22 @@ +//===-- Implementation header for RPC functions -----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H +#define LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H + +#include +#include + +namespace LIBC_NAMESPACE { + +int rpc_fprintf(::FILE *__restrict stream, const char *__restrict format, + void *argc, size_t size); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_GPU_RPC_HOST_CALL_H diff --git a/libc/test/integration/src/stdio/CMakeLists.txt b/libc/test/integration/src/stdio/CMakeLists.txt index 61caa2e..51c5ee2 100644 --- a/libc/test/integration/src/stdio/CMakeLists.txt +++ b/libc/test/integration/src/stdio/CMakeLists.txt @@ -1,3 +1,6 @@ +if(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${LIBC_TARGET_OS}) + add_subdirectory(${LIBC_TARGET_OS}) +endif() add_custom_target(stdio-integration-tests) add_dependencies(libc-integration-tests stdio-integration-tests) diff --git a/libc/test/integration/src/stdio/gpu/CMakeLists.txt b/libc/test/integration/src/stdio/gpu/CMakeLists.txt new file mode 100644 index 0000000..6327c45 --- /dev/null +++ b/libc/test/integration/src/stdio/gpu/CMakeLists.txt @@ -0,0 +1,21 @@ +add_custom_target(stdio-gpu-integration-tests) +add_dependencies(libc-integration-tests stdio-gpu-integration-tests) + +# Create an output directory for any temporary test files. +file(MAKE_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}/testdata) + +# These tests are not for correctness testing, but are instead a convenient way +# to generate hermetic binaries for comparitive binary size testing. +add_integration_test( + printf_test + SUITE + stdio-gpu-integration-tests + SRCS + printf.cpp + DEPENDS + libc.src.gpu.rpc_fprintf + libc.src.stdio.fopen + LOADER_ARGS + --threads 32 + --blocks 4 +) diff --git a/libc/test/integration/src/stdio/gpu/printf.cpp b/libc/test/integration/src/stdio/gpu/printf.cpp new file mode 100644 index 0000000..97ad4ac --- /dev/null +++ b/libc/test/integration/src/stdio/gpu/printf.cpp @@ -0,0 +1,88 @@ +//===-- RPC test to check args to printf ----------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "test/IntegrationTest/test.h" + +#include "src/__support/GPU/utils.h" +#include "src/gpu/rpc_fprintf.h" +#include "src/stdio/fopen.h" + +using namespace LIBC_NAMESPACE; + +FILE *file = LIBC_NAMESPACE::fopen("testdata/test_data.txt", "w"); + +TEST_MAIN(int argc, char **argv, char **envp) { + ASSERT_TRUE(file && "failed to open file"); + // Check basic printing. + int written = 0; + written = LIBC_NAMESPACE::rpc_fprintf(file, "A simple string\n", nullptr, 0); + ASSERT_EQ(written, 16); + + const char *str = "A simple string\n"; + written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &str, sizeof(void *)); + ASSERT_EQ(written, 16); + + // Check printing a different value with each thread. + uint64_t thread_id = gpu::get_thread_id(); + written = LIBC_NAMESPACE::rpc_fprintf(file, "%8ld\n", &thread_id, + sizeof(thread_id)); + ASSERT_EQ(written, 9); + + struct { + uint32_t x = 1; + char c = 'c'; + double f = 1.0; + } args1; + written = + LIBC_NAMESPACE::rpc_fprintf(file, "%d%c%.1f\n", &args1, sizeof(args1)); + ASSERT_EQ(written, 6); + + struct { + uint32_t x = 1; + const char *str = "A simple string\n"; + } args2; + written = + LIBC_NAMESPACE::rpc_fprintf(file, "%032b%s\n", &args2, sizeof(args2)); + ASSERT_EQ(written, 49); + + // Check that the server correctly handles divergent numbers of arguments. + const char *format = gpu::get_thread_id() % 2 ? "%s" : "%20ld\n"; + written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str, sizeof(void *)); + ASSERT_EQ(written, gpu::get_thread_id() % 2 ? 16 : 21); + + format = gpu::get_thread_id() % 2 ? "%s" : str; + written = LIBC_NAMESPACE::rpc_fprintf(file, format, &str, sizeof(void *)); + ASSERT_EQ(written, 16); + + // Check that we handle null arguments correctly. + struct { + void *null = nullptr; + } args3; + written = LIBC_NAMESPACE::rpc_fprintf(file, "%p", &args3, sizeof(args3)); + ASSERT_EQ(written, 9); + +#ifndef LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS + written = LIBC_NAMESPACE::rpc_fprintf(file, "%s", &args3, sizeof(args3)); + ASSERT_EQ(written, 6); +#endif // LIBC_COPT_PRINTF_NO_NULLPTR_CHECKS + + // Check for extremely abused variable width arguments + struct { + uint32_t x = 1; + uint32_t y = 2; + double f = 1.0; + } args4; + written = LIBC_NAMESPACE::rpc_fprintf(file, "%**d", &args4, sizeof(args4)); + ASSERT_EQ(written, 4); + written = LIBC_NAMESPACE::rpc_fprintf(file, "%**d%6d", &args4, sizeof(args4)); + ASSERT_EQ(written, 10); + written = LIBC_NAMESPACE::rpc_fprintf(file, "%**.**f", &args4, sizeof(args4)); + ASSERT_EQ(written, 7); + + return 0; +} diff --git a/libc/utils/gpu/server/CMakeLists.txt b/libc/utils/gpu/server/CMakeLists.txt index 6fca72c..94347ef 100644 --- a/libc/utils/gpu/server/CMakeLists.txt +++ b/libc/utils/gpu/server/CMakeLists.txt @@ -1,4 +1,8 @@ -add_library(llvmlibc_rpc_server STATIC rpc_server.cpp) +add_library(llvmlibc_rpc_server STATIC + ${LIBC_SOURCE_DIR}/src/stdio/printf_core/writer.cpp + ${LIBC_SOURCE_DIR}/src/stdio/printf_core/converter.cpp + rpc_server.cpp +) # Include the RPC implemenation from libc. target_include_directories(llvmlibc_rpc_server PRIVATE ${LIBC_SOURCE_DIR}) @@ -9,6 +13,10 @@ target_include_directories(llvmlibc_rpc_server PUBLIC ${CMAKE_CURRENT_SOURCE_DIR target_compile_options(llvmlibc_rpc_server PUBLIC $<$:-Wno-attributes>) target_compile_definitions(llvmlibc_rpc_server PUBLIC + LIBC_COPT_USE_C_ASSERT + LIBC_COPT_ARRAY_ARG_LIST + LIBC_COPT_PRINTF_DISABLE_WRITE_INT + LIBC_COPT_PRINTF_DISABLE_INDEX_MODE LIBC_NAMESPACE=${LIBC_NAMESPACE}) # Install the server and associated header. diff --git a/libc/utils/gpu/server/rpc_server.cpp b/libc/utils/gpu/server/rpc_server.cpp index fd30664..095f3fa 100644 --- a/libc/utils/gpu/server/rpc_server.cpp +++ b/libc/utils/gpu/server/rpc_server.cpp @@ -14,7 +14,13 @@ #include "llvmlibc_rpc_server.h" #include "src/__support/RPC/rpc.h" +#include "src/__support/arg_list.h" +#include "src/stdio/printf_core/converter.h" +#include "src/stdio/printf_core/parser.h" +#include "src/stdio/printf_core/writer.h" + #include "src/stdio/gpu/file.h" +#include #include #include #include @@ -25,6 +31,7 @@ #include using namespace LIBC_NAMESPACE; +using namespace LIBC_NAMESPACE::printf_core; static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer), "Buffer size mismatch"); @@ -32,6 +39,141 @@ static_assert(sizeof(rpc_buffer_t) == sizeof(rpc::Buffer), static_assert(RPC_MAXIMUM_PORT_COUNT == rpc::MAX_PORT_COUNT, "Incorrect maximum port count"); +template void handle_printf(rpc::Server::Port &port) { + FILE *files[lane_size] = {nullptr}; + // Get the appropriate output stream to use. + if (port.get_opcode() == RPC_PRINTF_TO_STREAM) + port.recv([&](rpc::Buffer *buffer, uint32_t id) { + files[id] = reinterpret_cast(buffer->data[0]); + }); + else if (port.get_opcode() == RPC_PRINTF_TO_STDOUT) + std::fill(files, files + lane_size, stdout); + else + std::fill(files, files + lane_size, stderr); + + uint64_t format_sizes[lane_size] = {0}; + void *format[lane_size] = {nullptr}; + + uint64_t args_sizes[lane_size] = {0}; + void *args[lane_size] = {nullptr}; + + // Recieve the format string and arguments from the client. + port.recv_n(format, format_sizes, + [&](uint64_t size) { return new char[size]; }); + port.recv_n(args, args_sizes, [&](uint64_t size) { return new char[size]; }); + + // Identify any arguments that are actually pointers to strings on the client. + // Additionally we want to determine how much buffer space we need to print. + std::vector strs_to_copy[lane_size]; + int buffer_size[lane_size] = {0}; + for (uint32_t lane = 0; lane < lane_size; ++lane) { + if (!format[lane]) + continue; + + WriteBuffer wb(nullptr, 0); + Writer writer(&wb); + + internal::StructArgList printf_args(args[lane], args_sizes[lane]); + Parser parser( + reinterpret_cast(format[lane]), printf_args); + + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty(); + cur_section = parser.get_next_section()) { + if (cur_section.has_conv && cur_section.conv_name == 's' && + cur_section.conv_val_ptr) { + strs_to_copy[lane].emplace_back(cur_section.conv_val_ptr); + } else if (cur_section.has_conv) { + // Ignore conversion errors for the first pass. + convert(&writer, cur_section); + } else { + writer.write(cur_section.raw_string); + } + } + buffer_size[lane] = writer.get_chars_written(); + } + + // Recieve any strings from the client and push them into a buffer. + std::vector copied_strs[lane_size]; + while (std::any_of(std::begin(strs_to_copy), std::end(strs_to_copy), + [](const auto &v) { return !v.empty() && v.back(); })) { + port.send([&](rpc::Buffer *buffer, uint32_t id) { + void *ptr = !strs_to_copy[id].empty() ? strs_to_copy[id].back() : nullptr; + buffer->data[1] = reinterpret_cast(ptr); + if (!strs_to_copy[id].empty()) + strs_to_copy[id].pop_back(); + }); + uint64_t str_sizes[lane_size] = {0}; + void *strs[lane_size] = {nullptr}; + port.recv_n(strs, str_sizes, [](uint64_t size) { return new char[size]; }); + for (uint32_t lane = 0; lane < lane_size; ++lane) { + if (!strs[lane]) + continue; + + copied_strs[lane].emplace_back(strs[lane]); + buffer_size[lane] += str_sizes[lane]; + } + } + + // Perform the final formatting and printing using the LLVM C library printf. + int results[lane_size] = {0}; + std::vector to_be_deleted; + for (uint32_t lane = 0; lane < lane_size; ++lane) { + if (!format[lane]) + continue; + + std::unique_ptr buffer(new char[buffer_size[lane]]); + WriteBuffer wb(buffer.get(), buffer_size[lane]); + Writer writer(&wb); + + internal::StructArgList printf_args(args[lane], args_sizes[lane]); + Parser parser( + reinterpret_cast(format[lane]), printf_args); + + // Parse and print the format string using the arguments we copied from + // the client. + int ret = 0; + for (FormatSection cur_section = parser.get_next_section(); + !cur_section.raw_string.empty(); + cur_section = parser.get_next_section()) { + // If this argument was a string we use the memory buffer we copied from + // the client by replacing the raw pointer with the copied one. + if (cur_section.has_conv && cur_section.conv_name == 's') { + if (!copied_strs[lane].empty()) { + cur_section.conv_val_ptr = copied_strs[lane].back(); + to_be_deleted.push_back(copied_strs[lane].back()); + copied_strs[lane].pop_back(); + } else { + cur_section.conv_val_ptr = nullptr; + } + } + if (cur_section.has_conv) { + ret = convert(&writer, cur_section); + if (ret == -1) + break; + } else { + writer.write(cur_section.raw_string); + } + } + + results[lane] = + fwrite(buffer.get(), 1, writer.get_chars_written(), files[lane]); + if (results[lane] != writer.get_chars_written() || ret == -1) + results[lane] = -1; + } + + // Send the final return value and signal completion by setting the string + // argument to null. + port.send([&](rpc::Buffer *buffer, uint32_t id) { + buffer->data[0] = static_cast(results[id]); + buffer->data[1] = reinterpret_cast(nullptr); + delete[] reinterpret_cast(format[id]); + delete[] reinterpret_cast(args[id]); + }); + for (void *ptr : to_be_deleted) + delete[] reinterpret_cast(ptr); +} + template rpc_status_t handle_server_impl( rpc::Server &server, @@ -195,6 +337,12 @@ rpc_status_t handle_server_impl( }); break; } + case RPC_PRINTF_TO_STREAM: + case RPC_PRINTF_TO_STDOUT: + case RPC_PRINTF_TO_STDERR: { + handle_printf(*port); + break; + } case RPC_NOOP: { port->recv([](rpc::Buffer *) {}); break; -- cgit v1.1 From 5029949952f4dc745dcb7799c7449a02fe8309c3 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Wed, 3 Apr 2024 00:28:47 +0300 Subject: [PAC][llvm-readobj][AArch64][ELF] Support `GNU_PROPERTY_AARCH64_FEATURE_PAUTH` (#85231) This adds support for `GNU_PROPERTY_AARCH64_FEATURE_PAUTH` feature (as defined in https://github.com/ARM-software/abi-aa/pull/240) handling in llvm-readobj and llvm-readelf. The following constants for supported platforms are also introduced: - `AARCH64_PAUTH_PLATFORM_INVALID = 0x0` - `AARCH64_PAUTH_PLATFORM_BAREMETAL = 0x1` - `AARCH64_PAUTH_PLATFORM_LLVM_LINUX = 0x10000002` For the llvm_linux platform, output of the tools contains descriptions of PAuth features which are enabled/disabled depending on the version value. Version value bits correspond to the following `LangOptions` defined in #85232: - bit 0: `PointerAuthIntrinsics`; - bit 1: `PointerAuthCalls`; - bit 2: `PointerAuthReturns`; - bit 3: `PointerAuthAuthTraps`; - bit 4: `PointerAuthVTPtrAddressDiscrimination`; - bit 5: `PointerAuthVTPtrTypeDiscrimination`; - bit 6: `PointerAuthInitFini`. Support for `.note.AARCH64-PAUTH-ABI-tag` is dropped since it's deleted from the spec in ARM-software/abi-aa#250. --- llvm/include/llvm/BinaryFormat/ELF.h | 26 +- .../ELF/AArch64/aarch64-feature-pauth.s | 305 ++++++++++++++------- .../ELF/AArch64/aarch64-note-gnu-property.s | 2 + llvm/tools/llvm-readobj/ELFDumper.cpp | 127 +++++---- 4 files changed, 303 insertions(+), 157 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 877f3f7..ed267c1 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1712,11 +1712,6 @@ enum { NT_ANDROID_TYPE_MEMTAG = 4, }; -// ARM note types. -enum { - NT_ARM_TYPE_PAUTH_ABI_TAG = 1, -}; - // Memory tagging values used in NT_ANDROID_TYPE_MEMTAG notes. enum { // Enumeration to determine the tagging mode. In Android-land, 'SYNC' means @@ -1740,6 +1735,7 @@ enum : unsigned { GNU_PROPERTY_STACK_SIZE = 1, GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2, GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000, + GNU_PROPERTY_AARCH64_FEATURE_PAUTH = 0xc0000001, GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002, GNU_PROPERTY_X86_UINT32_OR_LO = 0xc0008000, @@ -1758,6 +1754,26 @@ enum : unsigned { GNU_PROPERTY_AARCH64_FEATURE_1_GCS = 1 << 2, }; +// aarch64 PAuth platforms. +enum : unsigned { + AARCH64_PAUTH_PLATFORM_INVALID = 0x0, + AARCH64_PAUTH_PLATFORM_BAREMETAL = 0x1, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX = 0x10000002, +}; + +// Bit positions of version flags for AARCH64_PAUTH_PLATFORM_LLVM_LINUX. +enum : unsigned { + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS = 0, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS = 1, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS = 2, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS = 3, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR = 4, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR = 5, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI = 6, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST = + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI, +}; + // x86 processor feature bits. enum : unsigned { GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0, diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s index f28d92e..24918e8 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s @@ -1,98 +1,211 @@ # RUN: rm -rf %t && split-file %s %t && cd %t -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag.s -o tag.o -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-short.s -o tag-short.o -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-long.s -o tag-long.o - -# RUN: llvm-readelf --notes tag.o | FileCheck --check-prefix NORMAL %s -# RUN: llvm-readelf --notes tag-short.o | FileCheck --check-prefix SHORT %s -# RUN: llvm-readelf --notes tag-long.o | FileCheck --check-prefix LONG %s - -# NORMAL: AArch64 PAuth ABI tag: platform 0x2a, version 0x1 -# SHORT: AArch64 PAuth ABI tag: -# LONG: AArch64 PAuth ABI tag: platform 0x2a, version 0x1, additional info 0xEFCDAB8967452301 - -# RUN: llvm-readobj --notes tag.o | FileCheck --check-prefix LLVM-NORMAL %s -# RUN: llvm-readobj --notes tag-short.o | FileCheck --check-prefix LLVM-SHORT %s -# RUN: llvm-readobj --notes tag-long.o | FileCheck --check-prefix LLVM-LONG %s - -# LLVM-SHORT: Notes [ -# LLVM-SHORT-NEXT: NoteSection { -# LLVM-SHORT-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-SHORT-NEXT: Offset: 0x40 -# LLVM-SHORT-NEXT: Size: 0x1C -# LLVM-SHORT-NEXT: Note { -# LLVM-SHORT-NEXT: Owner: ARM -# LLVM-SHORT-NEXT: Data size: 0xC -# LLVM-SHORT-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-SHORT-NEXT: Description data ( -# LLVM-SHORT-NEXT: 0000: 2A000000 00000000 01000000 -# LLVM-SHORT-NEXT: ) -# LLVM-SHORT-NEXT: } -# LLVM-SHORT-NEXT: } -# LLVM-SHORT-NEXT: ] - -# LLVM-NORMAL: Notes [ -# LLVM-NORMAL-NEXT: NoteSection { -# LLVM-NORMAL-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-NORMAL-NEXT: Offset: 0x40 -# LLVM-NORMAL-NEXT: Size: 0x20 -# LLVM-NORMAL-NEXT: Note { -# LLVM-NORMAL-NEXT: Owner: ARM -# LLVM-NORMAL-NEXT: Data size: 0x10 -# LLVM-NORMAL-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-NORMAL-NEXT: Platform: 42 -# LLVM-NORMAL-NEXT: Version: 1 -# LLVM-NORMAL-NEXT: } -# LLVM-NORMAL-NEXT: } -# LLVM-NORMAL-NEXT: ] - -# LLVM-LONG: Notes [ -# LLVM-LONG-NEXT: NoteSection { -# LLVM-LONG-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-LONG-NEXT: Offset: 0x40 -# LLVM-LONG-NEXT: Size: 0x28 -# LLVM-LONG-NEXT: Note { -# LLVM-LONG-NEXT: Owner: ARM -# LLVM-LONG-NEXT: Data size: 0x18 -# LLVM-LONG-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-LONG-NEXT: Platform: 42 -# LLVM-LONG-NEXT: Version: 1 -# LLVM-LONG-NEXT: Additional info: EFCDAB8967452301 -# LLVM-LONG-NEXT: } -# LLVM-LONG-NEXT: } -# LLVM-LONG-NEXT: ] - -#--- abi-tag.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 16 -.long 1 -.asciz "ARM" - -.quad 42 // platform -.quad 1 // version - -#--- abi-tag-short.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 12 -.long 1 -.asciz "ARM" - -.quad 42 -.word 1 - -#--- abi-tag-long.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 24 -.long 1 -.asciz "ARM" - -.quad 42 // platform -.quad 1 // version -.quad 0x0123456789ABCDEF // extra data +#--- gnu-42-1.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 42 // PAuth ABI platform + .quad 1 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-42-1.s -o gnu-42-1.o +# RUN: llvm-readelf --notes gnu-42-1.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s +# RUN: llvm-readobj --notes gnu-42-1.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s + +# ELF: Displaying notes found in: .note.gnu.property +# ELF-NEXT: Owner Data size Description +# ELF-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) +# ELF-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] + +# OBJ: Notes [ +# OBJ-NEXT: NoteSection { +# OBJ-NEXT: Name: .note.gnu.property +# OBJ-NEXT: Offset: 0x40 +# OBJ-NEXT: Size: 0x28 +# OBJ-NEXT: Note { +# OBJ-NEXT: Owner: GNU +# OBJ-NEXT: Data size: 0x18 +# OBJ-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# OBJ-NEXT: Property [ +# OBJ-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] +# OBJ-NEXT: ] +# OBJ-NEXT: } +# OBJ-NEXT: } +# OBJ-NEXT: ] + +#--- gnu-0-0.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0 // PAuth ABI platform + .quad 0 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0-0.s -o gnu-0-0.o +# RUN: llvm-readelf --notes gnu-0-0.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s +# RUN: llvm-readobj --notes gnu-0-0.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s + +#--- gnu-1-0.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 1 // PAuth ABI platform + .quad 0 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-1-0.s -o gnu-1-0.o +# RUN: llvm-readelf --notes gnu-1-0.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s +# RUN: llvm-readobj --notes gnu-1-0.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s + +#--- gnu-0x10000002-85.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0x10000002 // PAuth ABI platform + .quad 85 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-85.s -o gnu-0x10000002-85.o +# RUN: llvm-readelf --notes gnu-0x10000002-85.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" \ +# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s +# RUN: llvm-readobj --notes gnu-0x10000002-85.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" \ +# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s + +#--- gnu-0x10000002-128.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0x10000002 // PAuth ABI platform + .quad 128 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-128.s -o gnu-0x10000002-128.o +# RUN: llvm-readelf --notes gnu-0x10000002-128.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s +# RUN: llvm-readobj --notes gnu-0x10000002-128.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s + +#--- gnu-short.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 12 // Data size + .quad 42 // PAuth ABI platform + .word 1 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-short.s -o gnu-short.o +# RUN: llvm-readelf --notes gnu-short.o | \ +# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=28 -DDATASIZE=18 \ +# RUN: -DERR="" %s +# RUN: llvm-readobj --notes gnu-short.o | \ +# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=28 -DDATASIZE=18 \ +# RUN: -DERR="" %s + +# ELF-ERR: Displaying notes found in: .note.gnu.property +# ELF-ERR-NEXT: Owner Data size Description +# ELF-ERR-NEXT: GNU 0x000000[[DATASIZE]] NT_GNU_PROPERTY_TYPE_0 (property note) +# ELF-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] + +# OBJ-ERR: Notes [ +# OBJ-ERR-NEXT: NoteSection { +# OBJ-ERR-NEXT: Name: .note.gnu.property +# OBJ-ERR-NEXT: Offset: 0x40 +# OBJ-ERR-NEXT: Size: 0x[[SIZE]] +# OBJ-ERR-NEXT: Note { +# OBJ-ERR-NEXT: Owner: GNU +# OBJ-ERR-NEXT: Data size: 0x[[DATASIZE]] +# OBJ-ERR-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# OBJ-ERR-NEXT: Property [ +# OBJ-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] +# OBJ-ERR-NEXT: ] +# OBJ-ERR-NEXT: } +# OBJ-ERR-NEXT: } +# OBJ-ERR-NEXT: ] + +#--- gnu-long.s + +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 24 // Data size + .quad 42 // PAuth ABI platform + .quad 1 // PAuth ABI version + .quad 0x0123456789ABCDEF + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-long.s -o gnu-long.o +# RUN: llvm-readelf --notes gnu-long.o | \ +# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=30 -DDATASIZE=20 \ +# RUN: -DERR="" %s +# RUN: llvm-readobj --notes gnu-long.o | \ +# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=30 -DDATASIZE=20 \ +# RUN: -DERR="" %s diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s index 377e6f9..b517f0b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s @@ -1,3 +1,5 @@ +// See tests for GNU_PROPERTY_AARCH64_FEATURE_PAUTH in aarch64-feature-pauth.s + // RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o %t // RUN: llvm-readelf --notes %t | FileCheck %s --check-prefix=GNU // RUN: llvm-readobj --notes %t | FileCheck %s --check-prefix=LLVM diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 4b406ef..29a0325 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -61,6 +61,7 @@ #include "llvm/Support/SystemZ/zOSSupport.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -5105,6 +5106,73 @@ template void GNUELFDumper::printAddrsig() { } } +template +static bool printAArch64PAuthABICoreInfo(raw_ostream &OS, uint32_t DataSize, + ArrayRef Desc) { + OS << " AArch64 PAuth ABI core info: "; + // DataSize - size without padding, Desc.size() - size with padding + if (DataSize != 16) { + OS << format("", DataSize); + return false; + } + + uint64_t Platform = + support::endian::read64(Desc.data() + 0); + uint64_t Version = support::endian::read64(Desc.data() + 8); + + const char *PlatformDesc = [Platform]() { + switch (Platform) { + case AARCH64_PAUTH_PLATFORM_INVALID: + return "invalid"; + case AARCH64_PAUTH_PLATFORM_BAREMETAL: + return "baremetal"; + case AARCH64_PAUTH_PLATFORM_LLVM_LINUX: + return "llvm_linux"; + default: + return "unknown"; + } + }(); + + std::string VersionDesc = [Platform, Version]() -> std::string { + if (Platform != AARCH64_PAUTH_PLATFORM_LLVM_LINUX) + return ""; + if (Version >= (1 << (AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST + 1))) + return "unknown"; + + std::array + Flags; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS] = "Intrinsics"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS] = "Calls"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS] = "Returns"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS] = "AuthTraps"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR] = + "VTPtrAddressDiscrimination"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR] = + "VTPtrTypeDiscrimination"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI] = "InitFini"; + + static_assert(AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI == + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST, + "Update when new enum items are defined"); + + std::string Desc; + for (uint32_t I = 0; I < Flags.size(); ++I) { + if (!(Version & (1 << I))) + Desc += '!'; + Desc += Twine("PointerAuth" + Flags[I] + ", ").str(); + } + Desc.resize(Desc.size() - 2); // Trim last ", " + return Desc; + }(); + + OS << format("platform 0x%x (%s), version 0x%x", Platform, PlatformDesc, + Version); + if (!VersionDesc.empty()) + OS << format(" (%s)", VersionDesc.c_str()); + + return true; +} + template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -5162,6 +5230,9 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, if (PrData) OS << format("", PrData); return OS.str(); + case GNU_PROPERTY_AARCH64_FEATURE_PAUTH: + printAArch64PAuthABICoreInfo(OS, DataSize, Data); + return OS.str(); case GNU_PROPERTY_X86_FEATURE_2_NEEDED: case GNU_PROPERTY_X86_FEATURE_2_USED: OS << "x86 feature " @@ -5364,29 +5435,6 @@ static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType, } template -static bool printAArch64Note(raw_ostream &OS, uint32_t NoteType, - ArrayRef Desc) { - if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) - return false; - - OS << " AArch64 PAuth ABI tag: "; - if (Desc.size() < 16) { - OS << format("", Desc.size()); - return false; - } - - uint64_t Platform = endian::read64(Desc.data() + 0); - uint64_t Version = endian::read64(Desc.data() + 8); - OS << format("platform 0x%" PRIx64 ", version 0x%" PRIx64, Platform, Version); - - if (Desc.size() > 16) - OS << ", additional info 0x" - << toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16)); - - return true; -} - -template void GNUELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -5783,10 +5831,6 @@ const NoteType AndroidNoteTypes[] = { "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"}, }; -const NoteType ARMNoteTypes[] = { - {ELF::NT_ARM_TYPE_PAUTH_ABI_TAG, "NT_ARM_TYPE_PAUTH_ABI_TAG"}, -}; - const NoteType CoreNoteTypes[] = { {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, @@ -5905,8 +5949,6 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(LLVMOMPOFFLOADNoteTypes); if (Name == "Android") return FindNote(AndroidNoteTypes); - if (Name == "ARM") - return FindNote(ARMNoteTypes); if (ELFType == ELF::ET_CORE) return FindNote(CoreNoteTypes); @@ -6062,9 +6104,6 @@ template void GNUELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNote(OS, Type, Descriptor)) return Error::success(); - } else if (Name == "ARM") { - if (printAArch64Note(OS, Type, Descriptor)) - return Error::success(); } if (!Descriptor.empty()) { OS << " description data:"; @@ -7703,27 +7742,6 @@ static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, } template -static bool printAarch64NoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, - ScopedPrinter &W) { - if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) - return false; - - if (Desc.size() < 16) - return false; - - uint64_t platform = endian::read64(Desc.data() + 0); - uint64_t version = endian::read64(Desc.data() + 8); - W.printNumber("Platform", platform); - W.printNumber("Version", version); - - if (Desc.size() > 16) - W.printString("Additional info", - toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16))); - - return true; -} - -template void LLVMELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -7859,9 +7877,6 @@ template void LLVMELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); - } else if (Name == "ARM") { - if (printAarch64NoteLLVMStyle(Type, Descriptor, W)) - return Error::success(); } if (!Descriptor.empty()) { W.printBinaryBlock("Description data", Descriptor); -- cgit v1.1 From 2cf8118e3aa60f406ec41e88bdd4304f39744e89 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 16:36:36 -0500 Subject: [Libomptarget] Add RPC-based `printf` implementation for OpenMP (#85638) Summary: This patch adds an implementation of `printf` that's provided by the GPU C library runtime. This `pritnf` currently implemented using the same wrapper handling that OpenMP sets up. This will be removed once we have proper varargs support. This `printf` differs from the one CUDA offers in that it is synchronous and uses a finite size. Additionally we support pretty much every format specifier except the `%n` option. Depends on https://github.com/llvm/llvm-project/pull/85331 --- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 5 ++++ openmp/libomptarget/DeviceRTL/src/LibC.cpp | 13 ++++++++++ openmp/libomptarget/test/libc/printf.c | 36 ++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) create mode 100644 openmp/libomptarget/test/libc/printf.c diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 2509f12..2e7f28d 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -122,6 +122,11 @@ set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false ) set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports) +# If the user built with the GPU C library enabled we will use that instead. +if(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) + list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC) +endif() + # Prepend -I to each list element set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}") list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I") diff --git a/openmp/libomptarget/DeviceRTL/src/LibC.cpp b/openmp/libomptarget/DeviceRTL/src/LibC.cpp index af675b9..33fec81 100644 --- a/openmp/libomptarget/DeviceRTL/src/LibC.cpp +++ b/openmp/libomptarget/DeviceRTL/src/LibC.cpp @@ -53,10 +53,23 @@ void memset(void *dst, int C, size_t count) { dstc[I] = C; } +// If the user built with the GPU C library enabled we will assume that we can +// call it. +#ifdef OMPTARGET_HAS_LIBC + +// TODO: Remove this handling once we have varargs support. +extern struct FILE *stdout; +int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t); + +int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { + return rpc_fprintf(stdout, Format, Arguments, Size); +} +#else /// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { return impl::omp_vprintf(Format, Arguments, Size); } +#endif } #pragma omp end declare target diff --git a/openmp/libomptarget/test/libc/printf.c b/openmp/libomptarget/test/libc/printf.c new file mode 100644 index 0000000..64cdd80 --- /dev/null +++ b/openmp/libomptarget/test/libc/printf.c @@ -0,0 +1,36 @@ +// RUN: %libomptarget-compile-run-and-check-generic + +// REQUIRES: libc + +#include + +int main() { + // CHECK: PASS +#pragma omp target + { printf("PASS\n"); } + + // CHECK: PASS +#pragma omp target + { printf("%s\n", "PASS"); } + + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS + // CHECK: PASS +#pragma omp target teams num_teams(4) +#pragma omp parallel num_threads(2) + { printf("PASS\n"); } + + // CHECK: PASS + char str[] = {'P', 'A', 'S', 'S', '\0'}; +#pragma omp target map(to : str) + { printf("%s\n", str); } + + // CHECK: 11111111111 +#pragma omp target + { printf("%s%-.0f%4b%c%ld\n", "1111", 1.0, 0xf, '1', 1lu); } +} -- cgit v1.1 From a7f4576ff4e296ff42b16d9d91aadf82b5ea325c Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Tue, 2 Apr 2024 14:48:14 -0700 Subject: [clang-format] Fix a regression in annotating TrailingReturnArrow (#86624) Fixes #86559. --- clang/lib/Format/TokenAnnotator.cpp | 2 ++ clang/unittests/Format/TokenAnnotatorTest.cpp | 4 ++++ 2 files changed, 6 insertions(+) diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index a405a34..3e9988d 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -3889,6 +3889,8 @@ void TokenAnnotator::calculateFormattingInformation(AnnotatedLine &Line) const { } } else if (ClosingParen) { for (auto *Tok = ClosingParen->Next; Tok; Tok = Tok->Next) { + if (Tok->is(TT_CtorInitializerColon)) + break; if (Tok->is(tok::arrow)) { Tok->setType(TT_TrailingReturnArrow); break; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index 2539d3d..9425647d 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -1916,6 +1916,10 @@ TEST_F(TokenAnnotatorTest, UnderstandsTrailingReturnArrow) { ASSERT_EQ(Tokens.size(), 12u) << Tokens; EXPECT_TOKEN(Tokens[7], tok::arrow, TT_Unknown); + Tokens = annotate("__attribute__((cold)) C() : Base(obj->func()) {}"); + ASSERT_EQ(Tokens.size(), 21u) << Tokens; + EXPECT_TOKEN(Tokens[13], tok::arrow, TT_Unknown); + // Mixed Tokens = annotate("auto f() -> int { auto a = b()->c; }"); ASSERT_EQ(Tokens.size(), 18u) << Tokens; -- cgit v1.1 From e381586f259568bf244fcd857ce91fc5cb38b959 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 17:06:03 -0500 Subject: [libc] Work around lack of '__has_builtin' for GPU server (#87417) Summary: The RPC server build for the GPU support needs to be build from the "projects" phase of the LLVM build. That means it is built with the same compile that LLVM supports, which currently is GCC 7.4 in most cases. A previous patch removed the `LIBC_HAS_BUILTIN` indirection we used, which regressed the case where we used the `libc` source externally. The files that we need to use here are `converter.cpp` and `writer.cpp` which currently are compatible with C++17, so there aren't issues with the code itself. However, older GCC does not have this builtin which makes the checks fail. This patch just adds in a simple wrapper that allows it to correctly ignore everything if using a compiler that doesn't support it. --- libc/src/__support/macros/config.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/libc/src/__support/macros/config.h b/libc/src/__support/macros/config.h index 3f200f0..6390c79 100644 --- a/libc/src/__support/macros/config.h +++ b/libc/src/__support/macros/config.h @@ -13,6 +13,12 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H #define LLVM_LIBC_SRC___SUPPORT_MACROS_CONFIG_H +// Workaround for compilers that do not support builtin detection. +// FIXME: This is only required for the GPU portion which should be moved. +#ifndef __has_builtin +#define __has_builtin(b) 0 +#endif + // Compiler feature-detection. // clang.llvm.org/docs/LanguageExtensions.html#has-feature-and-has-extension #ifdef __has_feature -- cgit v1.1 From 68217a52fb9fec8a88623f97a90899b8d27eefb3 Mon Sep 17 00:00:00 2001 From: Haowei Date: Tue, 2 Apr 2024 15:11:17 -0700 Subject: [Fuchsia] Add SWIG flags to Fuchsia Clang stage2 build (#87421) This patch adds SWIG cmake flags to the stage2 build in Fuchsia Clang configuration. --- clang/cmake/caches/Fuchsia.cmake | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/cmake/caches/Fuchsia.cmake b/clang/cmake/caches/Fuchsia.cmake index df69d7d..393d97a 100644 --- a/clang/cmake/caches/Fuchsia.cmake +++ b/clang/cmake/caches/Fuchsia.cmake @@ -71,6 +71,8 @@ set(_FUCHSIA_BOOTSTRAP_PASSTHROUGH Python3_LIBRARIES Python3_INCLUDE_DIRS Python3_RPATH + SWIG_DIR + SWIG_EXECUTABLE CMAKE_FIND_PACKAGE_PREFER_CONFIG CMAKE_SYSROOT CMAKE_MODULE_LINKER_FLAGS -- cgit v1.1 From b4adb42151bbfa80be4cf6d076cbe5edf680693e Mon Sep 17 00:00:00 2001 From: Jon Chesterfield Date: Tue, 2 Apr 2024 22:14:03 +0100 Subject: Use setup_host_tool for clang-ast-dump, fixes 76707 --- clang/lib/Tooling/CMakeLists.txt | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/clang/lib/Tooling/CMakeLists.txt b/clang/lib/Tooling/CMakeLists.txt index 91e6cbdc..8b4ab0e 100644 --- a/clang/lib/Tooling/CMakeLists.txt +++ b/clang/lib/Tooling/CMakeLists.txt @@ -53,14 +53,16 @@ else() list(APPEND implicitDirs -I ${implicitDir}) endforeach() + setup_host_tool(clang-ast-dump CLANG_AST_DUMP clang_ast_dump_exe clang_ast_dump_target) + include(GetClangResourceDir) get_clang_resource_dir(resource_dir PREFIX ${LLVM_BINARY_DIR}) add_custom_command( COMMENT Generate ASTNodeAPI.json OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/ASTNodeAPI.json - DEPENDS clang-ast-dump clang-resource-headers + DEPENDS ${clang_ast_dump_target} clang-resource-headers COMMAND - $ + ${clang_ast_dump_exe} # Skip this in debug mode because parsing AST.h is too slow --skip-processing=${skip_expensive_processing} -I ${resource_dir}/include -- cgit v1.1 From 633bc3bfda71c55bc38d5a3bfdb426bab61ff101 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Tue, 2 Apr 2024 15:45:03 -0700 Subject: [CodeGen][NFC] Make an opt<> static --- clang/lib/CodeGen/BackendUtil.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 1220c57..c8b2a93 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -100,8 +100,8 @@ using namespace llvm; namespace llvm { extern cl::opt PrintPipelinePasses; -cl::opt ClRemoveTraps("clang-remove-traps", cl::Optional, - cl::desc("Insert remove-traps pass.")); +static cl::opt ClRemoveTraps("clang-remove-traps", cl::Optional, + cl::desc("Insert remove-traps pass.")); // Experiment to move sanitizers earlier. static cl::opt ClSanitizeOnOptimizerEarlyEP( -- cgit v1.1 From 12c7371296e59c22debdd906f632c5e6574e3a44 Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Tue, 2 Apr 2024 15:51:28 -0700 Subject: AMDGPU: Use PseudoInstr instead of Pseudo Mnemonic for SIMCInstr, NFC (#87420) Pseudo Mnemonic could be of other uses. --- llvm/lib/Target/AMDGPU/DSInstructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/DSInstructions.td b/llvm/lib/Target/AMDGPU/DSInstructions.td index e944dde..0773ef7 100644 --- a/llvm/lib/Target/AMDGPU/DSInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSInstructions.td @@ -1192,7 +1192,7 @@ def : GCNPat < class Base_DS_Real_gfx6_gfx7_gfx10_gfx11_gfx12 op, DS_Pseudo ps, int ef, string opName = ps.Mnemonic, bit hasGDS = true> - : DS_Real, SIMCInstr { + : DS_Real, SIMCInstr { let Inst{7-0} = !if(ps.has_offset0, offset0, 0); let Inst{15-8} = !if(ps.has_offset1, offset1, 0); @@ -1557,7 +1557,7 @@ defm DS_MAX_SRC2_F64 : DS_Real_gfx6_gfx7_gfx10<0x0d3>; class DS_Real_vi op, DS_Pseudo ps> : DS_Real , - SIMCInstr { + SIMCInstr { let AssemblerPredicate = isGFX8GFX9; let DecoderNamespace = "GFX8"; -- cgit v1.1 From 84ae8cb4af9abafe9f45e69744607aadb38d649a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20Kokem=C3=BCller?= Date: Wed, 3 Apr 2024 01:09:26 +0200 Subject: [libc++] `std::ranges::advance`: avoid unneeded bounds checks when advancing iterator (#84126) Currently, the bounds check in `std::ranges::advance(it, n, s)` is done _before_ `n` is checked. This results in one extra, unneeded bounds check. Thus, `std::ranges::advance(it, 1, s)` currently is _not_ simply equivalent to: ```c++ if (it != s) { ++it; } ``` This difference in behavior matters when the check involves some "expensive" logic. For example, the `==` operator of `std::istreambuf_iterator` may actually have to read the underlying `streambuf`. Swapping around the checks in the `while` results in the expected behavior. --- libcxx/include/__iterator/advance.h | 4 +- .../iterator_count_sentinel.pass.cpp | 76 +++++++++++++++++----- libcxx/test/support/test_iterators.h | 22 ++++++- 3 files changed, 80 insertions(+), 22 deletions(-) diff --git a/libcxx/include/__iterator/advance.h b/libcxx/include/__iterator/advance.h index 7959bde..296db1a 100644 --- a/libcxx/include/__iterator/advance.h +++ b/libcxx/include/__iterator/advance.h @@ -170,14 +170,14 @@ public: } else { // Otherwise, if `n` is non-negative, while `bool(i != bound_sentinel)` is true, increments `i` but at // most `n` times. - while (__i != __bound_sentinel && __n > 0) { + while (__n > 0 && __i != __bound_sentinel) { ++__i; --__n; } // Otherwise, while `bool(i != bound_sentinel)` is true, decrements `i` but at most `-n` times. if constexpr (bidirectional_iterator<_Ip> && same_as<_Ip, _Sp>) { - while (__i != __bound_sentinel && __n < 0) { + while (__n < 0 && __i != __bound_sentinel) { --__i; ++__n; } diff --git a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp index a1c1564..76439ef 100644 --- a/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp +++ b/libcxx/test/std/iterators/iterator.primitives/range.iter.ops/range.iter.ops.advance/iterator_count_sentinel.pass.cpp @@ -21,9 +21,12 @@ #include "../types.h" template -constexpr void check_forward(int* first, int* last, std::iter_difference_t n, int* expected) { +constexpr void +check_forward(int* first, int* last, std::iter_difference_t n, int* expected, int expected_equals_count = -1) { using Difference = std::iter_difference_t; Difference const M = (expected - first); // expected travel distance + // `expected_equals_count` is only relevant when `Count` is true. + assert(Count || expected_equals_count == -1); { It it(first); @@ -42,6 +45,7 @@ constexpr void check_forward(int* first, int* last, std::iter_difference_t n // regardless of the iterator category. assert(it.stride_count() == M); assert(it.stride_displacement() == M); + assert(it.equals_count() == expected_equals_count); } } @@ -74,9 +78,20 @@ constexpr void check_forward_sized_sentinel(int* first, int* last, std::iter_dif } } -template -constexpr void check_backward(int* first, int* last, std::iter_difference_t n, int* expected) { - static_assert(std::random_access_iterator, "This test doesn't support non random access iterators"); +struct Expected { + int stride_count; + int stride_displacement; + int equals_count; +}; + +template +constexpr void +check_backward(int* first, int* last, std::iter_difference_t n, int* expected, Expected expected_counts) { + // Check preconditions for `advance` when called with negative `n` + // (see [range.iter.op.advance]). In addition, allow `n == 0`. + assert(n <= 0); + static_assert(std::bidirectional_iterator); + using Difference = std::iter_difference_t; Difference const M = (expected - last); // expected travel distance (which is negative) @@ -92,9 +107,14 @@ constexpr void check_backward(int* first, int* last, std::iter_difference_t { auto it = stride_counting_iterator(It(last)); auto sent = stride_counting_iterator(It(first)); + static_assert(std::bidirectional_iterator>); + static_assert(Count == !std::sized_sentinel_for); + (void)std::ranges::advance(it, n, sent); - assert(it.stride_count() <= 1); - assert(it.stride_displacement() <= 1); + + assert(it.stride_count() == expected_counts.stride_count); + assert(it.stride_displacement() == expected_counts.stride_displacement); + assert(it.equals_count() == expected_counts.equals_count); } } @@ -171,13 +191,17 @@ constexpr bool test() { { int* expected = n > size ? range + size : range + n; + int equals_count = n > size ? size + 1 : n; + + // clang-format off check_forward>( range, range+size, n, expected); check_forward>( range, range+size, n, expected); - check_forward>( range, range+size, n, expected); - check_forward>(range, range+size, n, expected); - check_forward>(range, range+size, n, expected); - check_forward>( range, range+size, n, expected); - check_forward( range, range+size, n, expected); + check_forward>( range, range+size, n, expected, equals_count); + check_forward>(range, range+size, n, expected, equals_count); + check_forward>(range, range+size, n, expected, equals_count); + check_forward>( range, range+size, n, expected, equals_count); + check_forward( range, range+size, n, expected, equals_count); + // clang-format on check_forward_sized_sentinel>( range, range+size, n, expected); check_forward_sized_sentinel>( range, range+size, n, expected); @@ -188,14 +212,32 @@ constexpr bool test() { check_forward_sized_sentinel( range, range+size, n, expected); } + // Input and forward iterators are not tested as the backwards case does + // not apply for them. { - // Note that we can only test ranges::advance with a negative n for iterators that - // are sized sentinels for themselves, because ranges::advance is UB otherwise. - // In particular, that excludes bidirectional_iterators since those are not sized sentinels. int* expected = n > size ? range : range + size - n; - check_backward>(range, range+size, -n, expected); - check_backward>( range, range+size, -n, expected); - check_backward( range, range+size, -n, expected); + { + Expected expected_counts = { + .stride_count = static_cast(range + size - expected), + .stride_displacement = -expected_counts.stride_count, + .equals_count = n > size ? size + 1 : n, + }; + + check_backward>(range, range + size, -n, expected, expected_counts); + } + { + Expected expected_counts = { + // If `n >= size`, the algorithm can just do `it = std::move(sent);` + // instead of doing iterator arithmetic. + .stride_count = (n >= size) ? 0 : 1, + .stride_displacement = (n >= size) ? 0 : 1, + .equals_count = 0, + }; + + check_backward>(range, range + size, -n, expected, expected_counts); + check_backward>(range, range + size, -n, expected, expected_counts); + check_backward(range, range + size, -n, expected, expected_counts); + } } } } diff --git a/libcxx/test/support/test_iterators.h b/libcxx/test/support/test_iterators.h index c92ce37..7ffb749 100644 --- a/libcxx/test/support/test_iterators.h +++ b/libcxx/test/support/test_iterators.h @@ -725,11 +725,14 @@ struct common_input_iterator { # endif // TEST_STD_VER >= 20 // Iterator adaptor that counts the number of times the iterator has had a successor/predecessor -// operation called. Has two recorders: +// operation or an equality comparison operation called. Has three recorders: // * `stride_count`, which records the total number of calls to an op++, op--, op+=, or op-=. // * `stride_displacement`, which records the displacement of the calls. This means that both // op++/op+= will increase the displacement counter by 1, and op--/op-= will decrease the // displacement counter by 1. +// * `equals_count`, which records the total number of calls to an op== or op!=. If compared +// against a sentinel object, that sentinel object must call the `record_equality_comparison` +// function so that the comparison is counted correctly. template class stride_counting_iterator { public: @@ -754,6 +757,8 @@ public: constexpr difference_type stride_displacement() const { return stride_displacement_; } + constexpr difference_type equals_count() const { return equals_count_; } + constexpr decltype(auto) operator*() const { return *It(base_); } constexpr decltype(auto) operator[](difference_type n) const { return It(base_)[n]; } @@ -838,10 +843,13 @@ public: return base(x) - base(y); } + constexpr void record_equality_comparison() const { ++equals_count_; } + constexpr bool operator==(stride_counting_iterator const& other) const requires std::sentinel_for { - return It(base_) == It(other.base_); + record_equality_comparison(); + return It(base_) == It(other.base_); } friend constexpr bool operator<(stride_counting_iterator const& x, stride_counting_iterator const& y) @@ -875,6 +883,7 @@ private: decltype(base(std::declval())) base_; difference_type stride_count_ = 0; difference_type stride_displacement_ = 0; + mutable difference_type equals_count_ = 0; }; template stride_counting_iterator(It) -> stride_counting_iterator; @@ -887,7 +896,14 @@ class sentinel_wrapper { public: explicit sentinel_wrapper() = default; constexpr explicit sentinel_wrapper(const It& it) : base_(base(it)) {} - constexpr bool operator==(const It& other) const { return base_ == base(other); } + constexpr bool operator==(const It& other) const { + // If supported, record statistics about the equality operator call + // inside `other`. + if constexpr (requires { other.record_equality_comparison(); }) { + other.record_equality_comparison(); + } + return base_ == base(other); + } friend constexpr It base(const sentinel_wrapper& s) { return It(s.base_); } private: decltype(base(std::declval())) base_; -- cgit v1.1 From e61d6b74ddf28df196484f6251271f543ae902ab Mon Sep 17 00:00:00 2001 From: Michael Buch Date: Wed, 3 Apr 2024 00:28:12 +0100 Subject: [lldb][SymbolFileDWARFDebugMap] Introduce enum to indicate whether to continue iteration of object files (#87344) This patch introduces a new `IterationMarker` enum (happy to take alternative name suggestions), which callbacks, like the one in `SymbolFileDWARFDebugMap::ForEachSymbolFile`, can return in order to indicate whether the caller should continue iterating or bail. For now this patch just changes the `ForEachSymbolFile` callback to use this new enum. In the future we could change the various `DWARFIndex::GetXXX` callbacks to do the same. This makes the callbacks easier to read and hopefully reduces the chance of bugs like https://github.com/llvm/llvm-project/pull/87177. --- lldb/include/lldb/lldb-private-enumerations.h | 7 ++ .../SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp | 75 +++++++++++----------- .../SymbolFile/DWARF/SymbolFileDWARFDebugMap.h | 10 +-- 3 files changed, 52 insertions(+), 40 deletions(-) diff --git a/lldb/include/lldb/lldb-private-enumerations.h b/lldb/include/lldb/lldb-private-enumerations.h index b8f5045..68e060f 100644 --- a/lldb/include/lldb/lldb-private-enumerations.h +++ b/lldb/include/lldb/lldb-private-enumerations.h @@ -240,6 +240,13 @@ enum LoadDependentFiles { eLoadDependentsNo, }; +/// Useful for callbacks whose return type indicates +/// whether to continue iteration or short-circuit. +enum class IterationAction { + Continue = 0, + Stop, +}; + inline std::string GetStatDescription(lldb_private::StatisticKind K) { switch (K) { case StatisticKind::ExpressionSuccessful: diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp index 4bc2cfd..1de5858 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.cpp @@ -37,6 +37,7 @@ #include "LogChannelDWARF.h" #include "SymbolFileDWARF.h" +#include "lldb/lldb-private-enumerations.h" #include #include @@ -803,13 +804,13 @@ SymbolFileDWARFDebugMap::GetDynamicArrayInfoForUID( bool SymbolFileDWARFDebugMap::CompleteType(CompilerType &compiler_type) { bool success = false; if (compiler_type) { - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { if (oso_dwarf->HasForwardDeclForCompilerType(compiler_type)) { oso_dwarf->CompleteType(compiler_type); success = true; - return true; + return IterationAction::Stop; } - return false; + return IterationAction::Continue; }); } return success; @@ -915,7 +916,7 @@ void SymbolFileDWARFDebugMap::FindGlobalVariables( std::lock_guard guard(GetModuleMutex()); uint32_t total_matches = 0; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { const uint32_t old_size = variables.GetSize(); oso_dwarf->FindGlobalVariables(name, parent_decl_ctx, max_matches, variables); @@ -925,18 +926,18 @@ void SymbolFileDWARFDebugMap::FindGlobalVariables( // Are we getting all matches? if (max_matches == UINT32_MAX) - return false; // Yep, continue getting everything + return IterationAction::Continue; // Yep, continue getting everything // If we have found enough matches, lets get out if (max_matches >= total_matches) - return true; + return IterationAction::Stop; // Update the max matches for any subsequent calls to find globals in any // other object files with DWARF max_matches -= oso_matches; } - return false; + return IterationAction::Continue; }); } @@ -945,7 +946,7 @@ void SymbolFileDWARFDebugMap::FindGlobalVariables( VariableList &variables) { std::lock_guard guard(GetModuleMutex()); uint32_t total_matches = 0; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { const uint32_t old_size = variables.GetSize(); oso_dwarf->FindGlobalVariables(regex, max_matches, variables); @@ -955,18 +956,18 @@ void SymbolFileDWARFDebugMap::FindGlobalVariables( // Are we getting all matches? if (max_matches == UINT32_MAX) - return false; // Yep, continue getting everything + return IterationAction::Continue; // Yep, continue getting everything // If we have found enough matches, lets get out if (max_matches >= total_matches) - return true; + return IterationAction::Stop; // Update the max matches for any subsequent calls to find globals in any // other object files with DWARF max_matches -= oso_matches; } - return false; + return IterationAction::Continue; }); } @@ -1071,7 +1072,7 @@ void SymbolFileDWARFDebugMap::FindFunctions( LLDB_SCOPED_TIMERF("SymbolFileDWARFDebugMap::FindFunctions (name = %s)", lookup_info.GetLookupName().GetCString()); - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { uint32_t sc_idx = sc_list.GetSize(); oso_dwarf->FindFunctions(lookup_info, parent_decl_ctx, include_inlines, sc_list); @@ -1079,7 +1080,7 @@ void SymbolFileDWARFDebugMap::FindFunctions( RemoveFunctionsWithModuleNotEqualTo(m_objfile_sp->GetModule(), sc_list, sc_idx); } - return false; + return IterationAction::Continue; }); } @@ -1090,7 +1091,7 @@ void SymbolFileDWARFDebugMap::FindFunctions(const RegularExpression ®ex, LLDB_SCOPED_TIMERF("SymbolFileDWARFDebugMap::FindFunctions (regex = '%s')", regex.GetText().str().c_str()); - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { uint32_t sc_idx = sc_list.GetSize(); oso_dwarf->FindFunctions(regex, include_inlines, sc_list); @@ -1098,7 +1099,7 @@ void SymbolFileDWARFDebugMap::FindFunctions(const RegularExpression ®ex, RemoveFunctionsWithModuleNotEqualTo(m_objfile_sp->GetModule(), sc_list, sc_idx); } - return false; + return IterationAction::Continue; }); } @@ -1121,9 +1122,9 @@ void SymbolFileDWARFDebugMap::GetTypes(SymbolContextScope *sc_scope, oso_dwarf->GetTypes(sc_scope, type_mask, type_list); } } else { - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { oso_dwarf->GetTypes(sc_scope, type_mask, type_list); - return false; + return IterationAction::Continue; }); } } @@ -1141,9 +1142,9 @@ SymbolFileDWARFDebugMap::ParseCallEdgesInFunction( TypeSP SymbolFileDWARFDebugMap::FindDefinitionTypeForDWARFDeclContext( const DWARFDIE &die) { TypeSP type_sp; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { type_sp = oso_dwarf->FindDefinitionTypeForDWARFDeclContext(die); - return ((bool)type_sp); + return type_sp ? IterationAction::Stop : IterationAction::Continue; }); return type_sp; } @@ -1152,13 +1153,13 @@ bool SymbolFileDWARFDebugMap::Supports_DW_AT_APPLE_objc_complete_type( SymbolFileDWARF *skip_dwarf_oso) { if (m_supports_DW_AT_APPLE_objc_complete_type == eLazyBoolCalculate) { m_supports_DW_AT_APPLE_objc_complete_type = eLazyBoolNo; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { if (skip_dwarf_oso != oso_dwarf && oso_dwarf->Supports_DW_AT_APPLE_objc_complete_type(nullptr)) { m_supports_DW_AT_APPLE_objc_complete_type = eLazyBoolYes; - return true; + return IterationAction::Stop; } - return false; + return IterationAction::Continue; }); } return m_supports_DW_AT_APPLE_objc_complete_type == eLazyBoolYes; @@ -1217,10 +1218,10 @@ TypeSP SymbolFileDWARFDebugMap::FindCompleteObjCDefinitionTypeForDIE( if (!must_be_implementation) { TypeSP type_sp; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { type_sp = oso_dwarf->FindCompleteObjCDefinitionTypeForDIE( die, type_name, must_be_implementation); - return (bool)type_sp; + return type_sp ? IterationAction::Stop : IterationAction::Continue; }); return type_sp; @@ -1231,9 +1232,10 @@ TypeSP SymbolFileDWARFDebugMap::FindCompleteObjCDefinitionTypeForDIE( void SymbolFileDWARFDebugMap::FindTypes(const TypeQuery &query, TypeResults &results) { std::lock_guard guard(GetModuleMutex()); - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { oso_dwarf->FindTypes(query, results); - return results.Done(query); // Keep iterating if we aren't done. + return results.Done(query) ? IterationAction::Stop + : IterationAction::Continue; }); } @@ -1243,23 +1245,24 @@ CompilerDeclContext SymbolFileDWARFDebugMap::FindNamespace( std::lock_guard guard(GetModuleMutex()); CompilerDeclContext matching_namespace; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { matching_namespace = oso_dwarf->FindNamespace(name, parent_decl_ctx, only_root_namespaces); - return (bool)matching_namespace; + return matching_namespace ? IterationAction::Stop + : IterationAction::Continue; }); return matching_namespace; } void SymbolFileDWARFDebugMap::DumpClangAST(Stream &s) { - ForEachSymbolFile([&s](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&s](SymbolFileDWARF *oso_dwarf) { oso_dwarf->DumpClangAST(s); // The underlying assumption is that DumpClangAST(...) will obtain the // AST from the underlying TypeSystem and therefore we only need to do // this once and can stop after the first iteration hence we return true. - return true; + return IterationAction::Stop; }); } @@ -1389,9 +1392,9 @@ SymbolFileDWARFDebugMap::GetCompilerContextForUID(lldb::user_id_t type_uid) { void SymbolFileDWARFDebugMap::ParseDeclsForContext( lldb_private::CompilerDeclContext decl_ctx) { - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { oso_dwarf->ParseDeclsForContext(decl_ctx); - return false; // Keep iterating + return IterationAction::Continue; }); } @@ -1519,14 +1522,14 @@ SymbolFileDWARFDebugMap::AddOSOARanges(SymbolFileDWARF *dwarf2Data, ModuleList SymbolFileDWARFDebugMap::GetDebugInfoModules() { ModuleList oso_modules; - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { ObjectFile *oso_objfile = oso_dwarf->GetObjectFile(); if (oso_objfile) { ModuleSP module_sp = oso_objfile->GetModule(); if (module_sp) oso_modules.Append(module_sp); } - return false; // Keep iterating + return IterationAction::Continue; }); return oso_modules; } @@ -1579,8 +1582,8 @@ Status SymbolFileDWARFDebugMap::CalculateFrameVariableError(StackFrame &frame) { void SymbolFileDWARFDebugMap::GetCompileOptions( std::unordered_map &args) { - ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) -> bool { + ForEachSymbolFile([&](SymbolFileDWARF *oso_dwarf) { oso_dwarf->GetCompileOptions(args); - return false; + return IterationAction::Continue; }); } diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h index d639ee50..de22dd6 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARFDebugMap.h @@ -20,6 +20,7 @@ #include "UniqueDWARFASTType.h" #include "lldb/Utility/StructuredData.h" +#include "lldb/lldb-private-enumerations.h" class DWARFASTParserClang; @@ -233,13 +234,14 @@ protected: SymbolFileDWARF *GetSymbolFileByOSOIndex(uint32_t oso_idx); - // If closure returns "false", iteration continues. If it returns - // "true", iteration terminates. - void ForEachSymbolFile(std::function closure) { + /// If closure returns \ref IterationAction::Continue, iteration + /// continues. Otherwise, iteration terminates. + void + ForEachSymbolFile(std::function closure) { for (uint32_t oso_idx = 0, num_oso_idxs = m_compile_unit_infos.size(); oso_idx < num_oso_idxs; ++oso_idx) { if (SymbolFileDWARF *oso_dwarf = GetSymbolFileByOSOIndex(oso_idx)) { - if (closure(oso_dwarf)) + if (closure(oso_dwarf) == IterationAction::Stop) return; } } -- cgit v1.1 From 0a94d35bfb81cb0bef60ebe60513d191661da0bd Mon Sep 17 00:00:00 2001 From: Spenser Bauman Date: Tue, 2 Apr 2024 19:45:27 -0400 Subject: [mlir][tosa] Fix tosa-infer-shapes crash (#87234) The tosa-infer-shapes pass inserts tensor.cast operations to mediate refined result types with consumers whose types cannot be refined. This process interferes with how types are refined in tosa.while_loop body regions, where types are propagated speculatively (to determine the types of the tosa.yield terminator) and then reverted. The new tosa.cast operations result in a crash due to not having types associated to them for the reversion process. This change modifies the shape propagation behavior so that the introduction to tensor.cast operations behaves better with this type reversion process. The new behavior is to only introduce tensor.cast operations once we wish to commit the newly computed types to the IR. This is an example causing the crash: ```mlir func.func @while_dont_crash(%arg0 : tensor) -> (tensor<*xi32>) { %0 = tosa.add %arg0, %arg0 : (tensor, tensor) -> tensor<*xi32> %1 = tosa.while_loop (%arg1 = %0) : (tensor<*xi32>) -> tensor<*xi32> { %2 = "tosa.const"() <{value = dense<3> : tensor}> : () -> tensor %3 = tosa.greater_equal %2, %arg1 : (tensor, tensor<*xi32>) -> tensor<*xi1> tosa.yield %3 : tensor<*xi1> } do { ^bb0(%arg1: tensor<*xi32>): // Inferrable operation whose type will refine to tensor %3 = tosa.add %arg1, %arg1 : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32> // Non-inferrable use site, will require the cast: // tensor.cast %3 : tensor to tensor<*xi32> // // The new cast operation will result in accessing undefined memory through // originalTypeMap in the C++ code. "use"(%3) : (tensor<*xi32>) -> () tosa.yield %3 : tensor<*xi32> } return %1 : tensor<*xi32> } ``` The `tensor.cast` operation inserted in the loop body causes a failure in the code which resets the types after propagation through the loop body: ```c++ // The types inferred in the block assume the operand types specified for // this iteration. We need to restore the original types to ensure that // future iterations only use the already specified types, not possible // types from previous iterations. for (auto &block : bodyRegion) { for (auto arg : block.getArguments()) arg.setType(originalTypeMap[arg]); for (auto &op : block) for (auto result : op.getResults()) result.setType(originalTypeMap[result]); // problematic access } ``` --------- Co-authored-by: Spenser Bauman --- .../Dialect/Tosa/Transforms/TosaInferShapes.cpp | 198 +++++++++++---------- mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir | 93 +++++++++- 2 files changed, 195 insertions(+), 96 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp index ad28c56..8614559 100644 --- a/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp +++ b/mlir/lib/Dialect/Tosa/Transforms/TosaInferShapes.cpp @@ -18,14 +18,9 @@ #include "mlir/Dialect/Tosa/IR/TosaOps.h" #include "mlir/Dialect/Tosa/Utils/ShapeUtils.h" #include "mlir/IR/Builders.h" -#include "mlir/IR/BuiltinOps.h" -#include "mlir/IR/IRMapping.h" -#include "mlir/IR/Matchers.h" #include "mlir/Interfaces/InferTypeOpInterface.h" #include "mlir/Pass/Pass.h" #include "mlir/Transforms/DialectConversion.h" -#include "mlir/Transforms/GreedyPatternRewriteDriver.h" -#include "llvm/Support/FormatVariadic.h" namespace mlir { namespace tosa { @@ -39,9 +34,87 @@ using namespace mlir::tosa; namespace { -void propagateShapesInRegion(Region ®ion); +// Check whether this use case is replaceable. We define an op as +// being replaceable if it is used by a TosaOp, or an op with a +// type-inference related interface. +// When a non-replaceable use is encountered, the value is wrapped in a +// cast back to the original type after inference. +bool isReplaceableUser(Operation *user) { + // Handle unregistered dialects. + if (!user->getDialect()) + return false; + + return user->getDialect()->getNamespace() == + TosaDialect::getDialectNamespace() || + isa(user); +} + +// During type propagation, the types of values in the operator graph are +// updated. For the tosa.while_loop operation, types are speculatively updated +// within the body region to determine the output type of the while_loop. This +// process is performed until a fixed point is reached, then the types are +// reverted. +// +// This class encapsulates the state information needed to perform the reversion +// process or to commit to the final changes. +class TypeModificationState { +public: + TypeModificationState() = default; + + ~TypeModificationState() { + // Ensure the recorded modifications are either committed or reverted. + assert(oldTypes.empty() && "unhandled type modifications"); + } + + // Update the state of the value and record the old type. + void setType(Value value, Type type) { + if (value.getType() != type) { + oldTypes.emplace_back(value, value.getType()); + value.setType(type); + } + } -void propagateShapesToTosaIf(Operation &op) { + // Revert changes made to the types in the IR by setting all the affected + // values to their old types. + void revert() { + // Otherwise revert the changes. + for (auto [value, type] : oldTypes) + value.setType(type); + + oldTypes.clear(); + } + + // Commit the changes to the types in the IR. + // This requires inserting tensor.cast operations to mediate the newly + // inferred result types with users that do not support type inference. + void commit() { + // For each use whose type changed, cast the value with the new type back to + // the old type. + for (auto [value, oldType] : oldTypes) { + for (auto &use : value.getUses()) { + if (isReplaceableUser(use.getOwner())) + continue; + + OpBuilder builder(value.getContext()); + builder.setInsertionPoint(use.getOwner()); + + Location loc = value.getLoc(); + use.set(builder.create(loc, oldType, value)); + } + } + + oldTypes.clear(); + } + +private: + // A record of each value whose type was updated along with that value's + // previous type. + llvm::SmallVector> oldTypes; +}; + +void propagateShapesInRegion(Region ®ion, TypeModificationState &state); + +void propagateShapesToTosaIf(Operation &op, TypeModificationState &state) { IfOp ifOp = dyn_cast(op); if (!ifOp) return; @@ -58,7 +131,7 @@ void propagateShapesToTosaIf(Operation &op) { if (inferredTy.hasRank()) { Type newType = oldType.clone(inferredTy.getShape()); - blockArg.setType(newType); + state.setType(blockArg, newType); } } @@ -71,14 +144,14 @@ void propagateShapesToTosaIf(Operation &op) { ValueKnowledge::join(operandKnowledge, blockKnowledge); if (!joinedKnowledge) continue; - frontBlock.getArgument(i).setType(joinedKnowledge.getType()); + state.setType(frontBlock.getArgument(i), joinedKnowledge.getType()); } - propagateShapesInRegion(region); + propagateShapesInRegion(region, state); } } -void propagateShapesToTosaWhile(Operation &op) { +void propagateShapesToTosaWhile(Operation &op, TypeModificationState &state) { WhileOp whileOp = dyn_cast(op); if (!whileOp) return; @@ -86,49 +159,29 @@ void propagateShapesToTosaWhile(Operation &op) { // Determine what the expected argument types are to the cond/body blocks. // The expected arguments should be compatible with ever iteration of the // loop body / condition for tosa.while. - llvm::SmallVector argTypes; - for (auto operand : op.getOperands()) { - auto operandTy = cast(operand.getType()); - if (operandTy.hasRank()) { - auto newTy = operandTy.clone(operandTy.getShape()); - argTypes.push_back(newTy); - } else { - argTypes.push_back(operand.getType()); - } - } - - // Save out the type information so we can restore at the end. - llvm::DenseMap originalTypeMap; - for (auto &block : op.getRegion(1)) { - for (auto arg : block.getArguments()) - originalTypeMap[arg] = arg.getType(); - for (auto &op : block) - for (auto result : op.getResults()) - originalTypeMap[result] = result.getType(); - } + SmallVector argTypes = llvm::to_vector(op.getOperandTypes()); bool hasNewTypes = true; while (hasNewTypes) { + TypeModificationState localState; // Set types on the block args. Region &bodyRegion = op.getRegion(1); Block &block = bodyRegion.front(); for (int i = 0, s = argTypes.size(); i < s; i++) { - block.getArgument(i).setType(argTypes[i]); + localState.setType(block.getArgument(i), argTypes[i]); } // Propagate to the end. - propagateShapesInRegion(bodyRegion); + propagateShapesInRegion(bodyRegion, localState); - // Find all the tosa yield types and verify there is atleast one. + // Find all the tosa yield types and verify there is a single one. llvm::SmallVector yieldOps; for (auto &block : bodyRegion) if (auto yieldOp = dyn_cast(block.getTerminator())) yieldOps.push_back(yieldOp); - if (yieldOps.empty()) - return; - + assert(yieldOps.size() == 1 && "missing or non-unique yield op"); // Using the new tosa.yield operand types, infer the new subtypes. llvm::SmallVector yieldTypeInfo; for (auto ty : argTypes) { @@ -158,17 +211,8 @@ void propagateShapesToTosaWhile(Operation &op) { argTypes[i] = newType; } - // The types inferred in the block assume the operand types specified for - // this iteration. We need to restore the original types to ensure that - // future iterations only use the already specified types, not possible - // types from previous iterations. - for (auto &block : bodyRegion) { - for (auto arg : block.getArguments()) - arg.setType(originalTypeMap[arg]); - for (auto &op : block) - for (auto result : op.getResults()) - result.setType(originalTypeMap[result]); - } + // Revert all changes made during the speculative part of the algorithm. + localState.revert(); } // We now set the block arguments according to the most recent shape @@ -176,41 +220,22 @@ void propagateShapesToTosaWhile(Operation &op) { // iteration. for (auto ®ion : op.getRegions()) { for (unsigned int i = 0, s = argTypes.size(); i < s; i++) { - region.front().getArgument(i).setType(argTypes[i]); + state.setType(region.front().getArgument(i), argTypes[i]); } - propagateShapesInRegion(region); + propagateShapesInRegion(region, state); } } -// Track the old type for each operand whose type was updated -// during inference. This information is used to introduce casts -// back to the type expected by the operand after inference. -struct TypeRewriteInfo { - OpOperand *operand; - Type oldType; -}; - -void propagateShapesInRegion(Region ®ion) { - // Check whether this use case is replaceable. We define an op as - // being replaceable if it is used by a TosaOp, or an op with a - // type-inference related interface. - // When a non-replaceable use is encountered, the value is wrapped in a - // cast back to the original type after inference. - auto isReplaceableUser = [](Operation *user) -> bool { - return user->getDialect()->getNamespace() == - TosaDialect::getDialectNamespace() || - isa(user); - }; - - llvm::SmallVector requiresUpdate; +void propagateShapesInRegion(Region ®ion, TypeModificationState &state) { for (auto &block : region) { for (Operation &op : block) { - if (op.getDialect()->getNamespace() != TosaDialect::getDialectNamespace()) + if (!op.getDialect() || + op.getDialect()->getNamespace() != TosaDialect::getDialectNamespace()) continue; - propagateShapesToTosaIf(op); - propagateShapesToTosaWhile(op); + propagateShapesToTosaIf(op, state); + propagateShapesToTosaWhile(op, state); InferShapedTypeOpInterface shapeInterface = dyn_cast(op); @@ -252,30 +277,11 @@ void propagateShapesInRegion(Region ®ion) { continue; // Set new type - result.setType(newKnowledge.getType()); - - // Collect all uses of the operation which require update. - for (auto &user : result.getUses()) { - if (!isReplaceableUser(user.getOwner())) - requiresUpdate.push_back({&user, resultTy}); - } + state.setType(result, newKnowledge.getType()); } } } } - - // For each use whose type changed, cast the value with the new type back to - // the old type. - IRRewriter rewriter(region.getContext()); - for (auto [operand, oldType] : requiresUpdate) { - rewriter.setInsertionPoint(operand->getOwner()); - - auto oldValue = operand->get(); - - auto loc = oldValue.getLoc(); - auto castOp = rewriter.create(loc, oldType, oldValue); - operand->set(castOp); - } } /// Pass that performs shape propagation across TOSA operations. This includes @@ -285,7 +291,9 @@ struct TosaInferShapes public: void runOnOperation() override { func::FuncOp func = getOperation(); - propagateShapesInRegion(func.getBody()); + TypeModificationState state; + propagateShapesInRegion(func.getBody(), state); + state.commit(); } }; } // namespace diff --git a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir index 1f0cfaf..2be1204 100644 --- a/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir +++ b/mlir/test/Dialect/Tosa/tosa-infer-shapes.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt --split-input-file --tosa-infer-shapes %s | FileCheck %s +// RUN: mlir-opt --split-input-file --tosa-infer-shapes --allow-unregistered-dialect %s | FileCheck %s // CHECK-LABEL: @test_return func.func @test_return(%arg0 : tensor<4xf32>) -> tensor<*xf32> { @@ -1177,6 +1177,97 @@ func.func @while_test(%arg0 : tensor, %arg1 : tensor<1xi32>) -> () { // ----- +// This test locks down a fix for a crash in the type inference process. +// The relevant pattern is a while loop whose body contains a TOSA operation which is +// consumed by a non-inferrable user in the same body. +// Previously, this would trigger a crash due to how types are cached and then +// reapplied to the operations in the loops body. + +// CHECK-LABEL: @while_dont_crash +func.func @while_dont_crash(%arg0 : tensor) -> (tensor<*xi32>) { + %0 = tosa.add %arg0, %arg0 : (tensor, tensor) -> tensor<*xi32> + // CHECK: tosa.while_loop + // CHECK-SAME: (tensor) -> tensor + %1 = tosa.while_loop (%arg1 = %0) : (tensor<*xi32>) -> tensor<*xi32> { + %2 = "tosa.const"() <{value = dense<3> : tensor}> : () -> tensor + // CHECK: tosa.greater_equal + // CHECK-SAME: (tensor, tensor) -> tensor + %3 = tosa.greater_equal %2, %arg1 : (tensor, tensor<*xi32>) -> tensor<*xi1> + tosa.yield %3 : tensor<*xi1> + } do { + // CHECK: ^bb0 + // CHECK-SAME: tensor + ^bb0(%arg1: tensor<*xi32>): + // CHECK: tosa.add + // CHECK-SAME: (tensor, tensor) -> tensor + %3 = tosa.add %arg1, %arg1 : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32> + // CHECK: %[[CAST:.+]] = tensor.cast %{{.*}} : tensor to tensor<*xi32> + // CHECK: "use"(%[[CAST]]) : (tensor<*xi32>) -> () + "use"(%3) : (tensor<*xi32>) -> () + tosa.yield %3 : tensor<*xi32> + } + // CHECK: tensor.cast + return %1 : tensor<*xi32> +} + +// ----- + +// This test locks down a fix for a crash in the type inference process. +// The relevant pattern is a while loop whose body contains a TOSA operation which is +// consumed by a non-inferrable user in the same body. + +// CHECK-LABEL: @while_dont_crash_nested +func.func @while_dont_crash_nested(%arg0 : tensor) -> (tensor<*xi32>) { + %0 = tosa.add %arg0, %arg0 : (tensor, tensor) -> tensor<*xi32> + // CHECK: tosa.while_loop + // CHECK-SAME: (tensor) -> tensor + %1 = tosa.while_loop (%arg1 = %0) : (tensor<*xi32>) -> tensor<*xi32> { + %2 = "tosa.const"() <{value = dense<3> : tensor}> : () -> tensor + // CHECK: tosa.greater_equal + // CHECK-SAME: (tensor, tensor) -> tensor + %3 = tosa.greater_equal %2, %arg1 : (tensor, tensor<*xi32>) -> tensor<*xi1> + // CHECK: tosa.yield + // CHECK-SAME: tensor + tosa.yield %3 : tensor<*xi1> + } do { + // CHECK: ^bb0 + // CHECK-SAME: tensor + ^bb0(%arg1: tensor<*xi32>): + // CHECK: tosa.while_loop + // CHECK-SAME: (tensor) -> tensor + %1 = tosa.while_loop (%arg2 = %arg1) : (tensor<*xi32>) -> tensor<*xi32> { + %2 = "tosa.const"() <{value = dense<3> : tensor}> : () -> tensor + // CHECK: tosa.greater_equal + // CHECK-SAME: (tensor, tensor) -> tensor + %4 = tosa.greater_equal %2, %arg2 : (tensor, tensor<*xi32>) -> tensor<*xi1> + // CHECK: tosa.yield + // CHECK-SAME: tensor + tosa.yield %4 : tensor<*xi1> + } do { + // CHECK: ^bb0 + // CHECK-SAME: tensor + ^bb0(%arg2: tensor<*xi32>): + // CHECK: tosa.add + // CHECK-SAME: (tensor, tensor) -> tensor + %4 = tosa.add %arg2, %arg2 : (tensor<*xi32>, tensor<*xi32>) -> tensor<*xi32> + // CHECK: %[[CAST:.+]] = tensor.cast %{{.*}} : tensor to tensor<*xi32> + // CHECK: "use"(%[[CAST]]) : (tensor<*xi32>) -> () + "use"(%4) : (tensor<*xi32>) -> () + // CHECK: tosa.yield + // CHECK-SAME: tensor + tosa.yield %4 : tensor<*xi32> + } + // CHECK: tosa.yield + // CHECK-SAME: tensor + tosa.yield %1 : tensor<*xi32> + } + + // CHECK: tensor.cast + return %1 : tensor<*xi32> +} + +// ----- + // CHECK-LABEL: @test_static_rfft2d func.func @test_static_rfft2d(%arg0: tensor<5x2x8xf32>) -> () { // CHECK: -> (tensor<5x2x5xf32>, tensor<5x2x5xf32>) -- cgit v1.1 From 0492e1e79568eaad3b693b4c1031139437b7e3f8 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 19:15:06 -0500 Subject: [libc] Include 'config.h' from the printf structs for builtins --- libc/src/stdio/printf_core/core_structs.h | 1 + 1 file changed, 1 insertion(+) diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h index 1e78f19..b77b304 100644 --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -12,6 +12,7 @@ #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" +#include "src/__support/macros/config.h" #include "src/stdio/printf_core/printf_config.h" #include -- cgit v1.1 From c45861f4375c0c4525f14db00062a8e4bc00065c Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Wed, 3 Apr 2024 03:15:47 +0300 Subject: Revert "[PAC][llvm-readobj][AArch64][ELF] Support `GNU_PROPERTY_AARCH64_FEATURE_PAUTH`" (#87434) Reverts llvm/llvm-project#85231 See build failure https://lab.llvm.org/buildbot/#/builders/186/builds/15631 --- llvm/include/llvm/BinaryFormat/ELF.h | 26 +- .../ELF/AArch64/aarch64-feature-pauth.s | 305 +++++++-------------- .../ELF/AArch64/aarch64-note-gnu-property.s | 2 - llvm/tools/llvm-readobj/ELFDumper.cpp | 127 ++++----- 4 files changed, 157 insertions(+), 303 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index ed267c1..877f3f7 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1712,6 +1712,11 @@ enum { NT_ANDROID_TYPE_MEMTAG = 4, }; +// ARM note types. +enum { + NT_ARM_TYPE_PAUTH_ABI_TAG = 1, +}; + // Memory tagging values used in NT_ANDROID_TYPE_MEMTAG notes. enum { // Enumeration to determine the tagging mode. In Android-land, 'SYNC' means @@ -1735,7 +1740,6 @@ enum : unsigned { GNU_PROPERTY_STACK_SIZE = 1, GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2, GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000, - GNU_PROPERTY_AARCH64_FEATURE_PAUTH = 0xc0000001, GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002, GNU_PROPERTY_X86_UINT32_OR_LO = 0xc0008000, @@ -1754,26 +1758,6 @@ enum : unsigned { GNU_PROPERTY_AARCH64_FEATURE_1_GCS = 1 << 2, }; -// aarch64 PAuth platforms. -enum : unsigned { - AARCH64_PAUTH_PLATFORM_INVALID = 0x0, - AARCH64_PAUTH_PLATFORM_BAREMETAL = 0x1, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX = 0x10000002, -}; - -// Bit positions of version flags for AARCH64_PAUTH_PLATFORM_LLVM_LINUX. -enum : unsigned { - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS = 0, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS = 1, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS = 2, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS = 3, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR = 4, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR = 5, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI = 6, - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST = - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI, -}; - // x86 processor feature bits. enum : unsigned { GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0, diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s index 24918e8..f28d92e 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s @@ -1,211 +1,98 @@ # RUN: rm -rf %t && split-file %s %t && cd %t -#--- gnu-42-1.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 16 // Data size - .quad 42 // PAuth ABI platform - .quad 1 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-42-1.s -o gnu-42-1.o -# RUN: llvm-readelf --notes gnu-42-1.o | \ -# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s -# RUN: llvm-readobj --notes gnu-42-1.o | \ -# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s - -# ELF: Displaying notes found in: .note.gnu.property -# ELF-NEXT: Owner Data size Description -# ELF-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) -# ELF-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] - -# OBJ: Notes [ -# OBJ-NEXT: NoteSection { -# OBJ-NEXT: Name: .note.gnu.property -# OBJ-NEXT: Offset: 0x40 -# OBJ-NEXT: Size: 0x28 -# OBJ-NEXT: Note { -# OBJ-NEXT: Owner: GNU -# OBJ-NEXT: Data size: 0x18 -# OBJ-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) -# OBJ-NEXT: Property [ -# OBJ-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] -# OBJ-NEXT: ] -# OBJ-NEXT: } -# OBJ-NEXT: } -# OBJ-NEXT: ] - -#--- gnu-0-0.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 16 // Data size - .quad 0 // PAuth ABI platform - .quad 0 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0-0.s -o gnu-0-0.o -# RUN: llvm-readelf --notes gnu-0-0.o | \ -# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s -# RUN: llvm-readobj --notes gnu-0-0.o | \ -# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s - -#--- gnu-1-0.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 16 // Data size - .quad 1 // PAuth ABI platform - .quad 0 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-1-0.s -o gnu-1-0.o -# RUN: llvm-readelf --notes gnu-1-0.o | \ -# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s -# RUN: llvm-readobj --notes gnu-1-0.o | \ -# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s - -#--- gnu-0x10000002-85.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 16 // Data size - .quad 0x10000002 // PAuth ABI platform - .quad 85 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-85.s -o gnu-0x10000002-85.o -# RUN: llvm-readelf --notes gnu-0x10000002-85.o | \ -# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" \ -# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s -# RUN: llvm-readobj --notes gnu-0x10000002-85.o | \ -# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" \ -# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s - -#--- gnu-0x10000002-128.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 16 // Data size - .quad 0x10000002 // PAuth ABI platform - .quad 128 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-128.s -o gnu-0x10000002-128.o -# RUN: llvm-readelf --notes gnu-0x10000002-128.o | \ -# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s -# RUN: llvm-readobj --notes gnu-0x10000002-128.o | \ -# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s - -#--- gnu-short.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 12 // Data size - .quad 42 // PAuth ABI platform - .word 1 // PAuth ABI version - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-short.s -o gnu-short.o -# RUN: llvm-readelf --notes gnu-short.o | \ -# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=28 -DDATASIZE=18 \ -# RUN: -DERR="" %s -# RUN: llvm-readobj --notes gnu-short.o | \ -# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=28 -DDATASIZE=18 \ -# RUN: -DERR="" %s - -# ELF-ERR: Displaying notes found in: .note.gnu.property -# ELF-ERR-NEXT: Owner Data size Description -# ELF-ERR-NEXT: GNU 0x000000[[DATASIZE]] NT_GNU_PROPERTY_TYPE_0 (property note) -# ELF-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] - -# OBJ-ERR: Notes [ -# OBJ-ERR-NEXT: NoteSection { -# OBJ-ERR-NEXT: Name: .note.gnu.property -# OBJ-ERR-NEXT: Offset: 0x40 -# OBJ-ERR-NEXT: Size: 0x[[SIZE]] -# OBJ-ERR-NEXT: Note { -# OBJ-ERR-NEXT: Owner: GNU -# OBJ-ERR-NEXT: Data size: 0x[[DATASIZE]] -# OBJ-ERR-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) -# OBJ-ERR-NEXT: Property [ -# OBJ-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] -# OBJ-ERR-NEXT: ] -# OBJ-ERR-NEXT: } -# OBJ-ERR-NEXT: } -# OBJ-ERR-NEXT: ] - -#--- gnu-long.s - -.section ".note.gnu.property", "a" - .long 4 // Name length is always 4 ("GNU") - .long end - begin // Data length - .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 - .asciz "GNU" // Name - .p2align 3 -begin: - # PAuth ABI property note - .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH - .long 24 // Data size - .quad 42 // PAuth ABI platform - .quad 1 // PAuth ABI version - .quad 0x0123456789ABCDEF - .p2align 3 // Align to 8 byte for 64 bit -end: - -# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-long.s -o gnu-long.o -# RUN: llvm-readelf --notes gnu-long.o | \ -# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=30 -DDATASIZE=20 \ -# RUN: -DERR="" %s -# RUN: llvm-readobj --notes gnu-long.o | \ -# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=30 -DDATASIZE=20 \ -# RUN: -DERR="" %s +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag.s -o tag.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-short.s -o tag-short.o +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-long.s -o tag-long.o + +# RUN: llvm-readelf --notes tag.o | FileCheck --check-prefix NORMAL %s +# RUN: llvm-readelf --notes tag-short.o | FileCheck --check-prefix SHORT %s +# RUN: llvm-readelf --notes tag-long.o | FileCheck --check-prefix LONG %s + +# NORMAL: AArch64 PAuth ABI tag: platform 0x2a, version 0x1 +# SHORT: AArch64 PAuth ABI tag: +# LONG: AArch64 PAuth ABI tag: platform 0x2a, version 0x1, additional info 0xEFCDAB8967452301 + +# RUN: llvm-readobj --notes tag.o | FileCheck --check-prefix LLVM-NORMAL %s +# RUN: llvm-readobj --notes tag-short.o | FileCheck --check-prefix LLVM-SHORT %s +# RUN: llvm-readobj --notes tag-long.o | FileCheck --check-prefix LLVM-LONG %s + +# LLVM-SHORT: Notes [ +# LLVM-SHORT-NEXT: NoteSection { +# LLVM-SHORT-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag +# LLVM-SHORT-NEXT: Offset: 0x40 +# LLVM-SHORT-NEXT: Size: 0x1C +# LLVM-SHORT-NEXT: Note { +# LLVM-SHORT-NEXT: Owner: ARM +# LLVM-SHORT-NEXT: Data size: 0xC +# LLVM-SHORT-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG +# LLVM-SHORT-NEXT: Description data ( +# LLVM-SHORT-NEXT: 0000: 2A000000 00000000 01000000 +# LLVM-SHORT-NEXT: ) +# LLVM-SHORT-NEXT: } +# LLVM-SHORT-NEXT: } +# LLVM-SHORT-NEXT: ] + +# LLVM-NORMAL: Notes [ +# LLVM-NORMAL-NEXT: NoteSection { +# LLVM-NORMAL-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag +# LLVM-NORMAL-NEXT: Offset: 0x40 +# LLVM-NORMAL-NEXT: Size: 0x20 +# LLVM-NORMAL-NEXT: Note { +# LLVM-NORMAL-NEXT: Owner: ARM +# LLVM-NORMAL-NEXT: Data size: 0x10 +# LLVM-NORMAL-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG +# LLVM-NORMAL-NEXT: Platform: 42 +# LLVM-NORMAL-NEXT: Version: 1 +# LLVM-NORMAL-NEXT: } +# LLVM-NORMAL-NEXT: } +# LLVM-NORMAL-NEXT: ] + +# LLVM-LONG: Notes [ +# LLVM-LONG-NEXT: NoteSection { +# LLVM-LONG-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag +# LLVM-LONG-NEXT: Offset: 0x40 +# LLVM-LONG-NEXT: Size: 0x28 +# LLVM-LONG-NEXT: Note { +# LLVM-LONG-NEXT: Owner: ARM +# LLVM-LONG-NEXT: Data size: 0x18 +# LLVM-LONG-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG +# LLVM-LONG-NEXT: Platform: 42 +# LLVM-LONG-NEXT: Version: 1 +# LLVM-LONG-NEXT: Additional info: EFCDAB8967452301 +# LLVM-LONG-NEXT: } +# LLVM-LONG-NEXT: } +# LLVM-LONG-NEXT: ] + +#--- abi-tag.s + +.section ".note.AARCH64-PAUTH-ABI-tag", "a" +.long 4 +.long 16 +.long 1 +.asciz "ARM" + +.quad 42 // platform +.quad 1 // version + +#--- abi-tag-short.s + +.section ".note.AARCH64-PAUTH-ABI-tag", "a" +.long 4 +.long 12 +.long 1 +.asciz "ARM" + +.quad 42 +.word 1 + +#--- abi-tag-long.s + +.section ".note.AARCH64-PAUTH-ABI-tag", "a" +.long 4 +.long 24 +.long 1 +.asciz "ARM" + +.quad 42 // platform +.quad 1 // version +.quad 0x0123456789ABCDEF // extra data diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s index b517f0b..377e6f9 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s @@ -1,5 +1,3 @@ -// See tests for GNU_PROPERTY_AARCH64_FEATURE_PAUTH in aarch64-feature-pauth.s - // RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o %t // RUN: llvm-readelf --notes %t | FileCheck %s --check-prefix=GNU // RUN: llvm-readobj --notes %t | FileCheck %s --check-prefix=LLVM diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 29a0325..4b406ef 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -61,7 +61,6 @@ #include "llvm/Support/SystemZ/zOSSupport.h" #include "llvm/Support/raw_ostream.h" #include -#include #include #include #include @@ -5106,73 +5105,6 @@ template void GNUELFDumper::printAddrsig() { } } -template -static bool printAArch64PAuthABICoreInfo(raw_ostream &OS, uint32_t DataSize, - ArrayRef Desc) { - OS << " AArch64 PAuth ABI core info: "; - // DataSize - size without padding, Desc.size() - size with padding - if (DataSize != 16) { - OS << format("", DataSize); - return false; - } - - uint64_t Platform = - support::endian::read64(Desc.data() + 0); - uint64_t Version = support::endian::read64(Desc.data() + 8); - - const char *PlatformDesc = [Platform]() { - switch (Platform) { - case AARCH64_PAUTH_PLATFORM_INVALID: - return "invalid"; - case AARCH64_PAUTH_PLATFORM_BAREMETAL: - return "baremetal"; - case AARCH64_PAUTH_PLATFORM_LLVM_LINUX: - return "llvm_linux"; - default: - return "unknown"; - } - }(); - - std::string VersionDesc = [Platform, Version]() -> std::string { - if (Platform != AARCH64_PAUTH_PLATFORM_LLVM_LINUX) - return ""; - if (Version >= (1 << (AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST + 1))) - return "unknown"; - - std::array - Flags; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS] = "Intrinsics"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS] = "Calls"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS] = "Returns"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS] = "AuthTraps"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR] = - "VTPtrAddressDiscrimination"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR] = - "VTPtrTypeDiscrimination"; - Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI] = "InitFini"; - - static_assert(AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI == - AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST, - "Update when new enum items are defined"); - - std::string Desc; - for (uint32_t I = 0; I < Flags.size(); ++I) { - if (!(Version & (1 << I))) - Desc += '!'; - Desc += Twine("PointerAuth" + Flags[I] + ", ").str(); - } - Desc.resize(Desc.size() - 2); // Trim last ", " - return Desc; - }(); - - OS << format("platform 0x%x (%s), version 0x%x", Platform, PlatformDesc, - Version); - if (!VersionDesc.empty()) - OS << format(" (%s)", VersionDesc.c_str()); - - return true; -} - template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -5230,9 +5162,6 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, if (PrData) OS << format("", PrData); return OS.str(); - case GNU_PROPERTY_AARCH64_FEATURE_PAUTH: - printAArch64PAuthABICoreInfo(OS, DataSize, Data); - return OS.str(); case GNU_PROPERTY_X86_FEATURE_2_NEEDED: case GNU_PROPERTY_X86_FEATURE_2_USED: OS << "x86 feature " @@ -5435,6 +5364,29 @@ static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType, } template +static bool printAArch64Note(raw_ostream &OS, uint32_t NoteType, + ArrayRef Desc) { + if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) + return false; + + OS << " AArch64 PAuth ABI tag: "; + if (Desc.size() < 16) { + OS << format("", Desc.size()); + return false; + } + + uint64_t Platform = endian::read64(Desc.data() + 0); + uint64_t Version = endian::read64(Desc.data() + 8); + OS << format("platform 0x%" PRIx64 ", version 0x%" PRIx64, Platform, Version); + + if (Desc.size() > 16) + OS << ", additional info 0x" + << toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16)); + + return true; +} + +template void GNUELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -5831,6 +5783,10 @@ const NoteType AndroidNoteTypes[] = { "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"}, }; +const NoteType ARMNoteTypes[] = { + {ELF::NT_ARM_TYPE_PAUTH_ABI_TAG, "NT_ARM_TYPE_PAUTH_ABI_TAG"}, +}; + const NoteType CoreNoteTypes[] = { {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, @@ -5949,6 +5905,8 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(LLVMOMPOFFLOADNoteTypes); if (Name == "Android") return FindNote(AndroidNoteTypes); + if (Name == "ARM") + return FindNote(ARMNoteTypes); if (ELFType == ELF::ET_CORE) return FindNote(CoreNoteTypes); @@ -6104,6 +6062,9 @@ template void GNUELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNote(OS, Type, Descriptor)) return Error::success(); + } else if (Name == "ARM") { + if (printAArch64Note(OS, Type, Descriptor)) + return Error::success(); } if (!Descriptor.empty()) { OS << " description data:"; @@ -7742,6 +7703,27 @@ static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, } template +static bool printAarch64NoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, + ScopedPrinter &W) { + if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) + return false; + + if (Desc.size() < 16) + return false; + + uint64_t platform = endian::read64(Desc.data() + 0); + uint64_t version = endian::read64(Desc.data() + 8); + W.printNumber("Platform", platform); + W.printNumber("Version", version); + + if (Desc.size() > 16) + W.printString("Additional info", + toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16))); + + return true; +} + +template void LLVMELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -7877,6 +7859,9 @@ template void LLVMELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); + } else if (Name == "ARM") { + if (printAarch64NoteLLVMStyle(Type, Descriptor, W)) + return Error::success(); } if (!Descriptor.empty()) { W.printBinaryBlock("Description data", Descriptor); -- cgit v1.1 From 04dbf7ad44dbe099f8265ad1db38cbf9a0767a82 Mon Sep 17 00:00:00 2001 From: "A. Jiang" Date: Wed, 3 Apr 2024 08:21:15 +0800 Subject: [libc++][ranges] Avoid using `distance` in `ranges::contains_subrange` (#87155) Both `std::distance` or `ranges::distance` are inefficient for non-sized ranges. Also, calculating the range using `int` type is seriously problematic. This patch avoids using `distance` and calculation of the length of non-sized ranges. Fixes #86833. --- libcxx/include/__algorithm/ranges_contains_subrange.h | 14 ++++++-------- .../alg.contains/ranges.contains_subrange.pass.cpp | 4 ++++ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/libcxx/include/__algorithm/ranges_contains_subrange.h b/libcxx/include/__algorithm/ranges_contains_subrange.h index 4cd03cb..bc5a86c 100644 --- a/libcxx/include/__algorithm/ranges_contains_subrange.h +++ b/libcxx/include/__algorithm/ranges_contains_subrange.h @@ -15,11 +15,11 @@ #include <__functional/ranges_operations.h> #include <__functional/reference_wrapper.h> #include <__iterator/concepts.h> -#include <__iterator/distance.h> #include <__iterator/indirectly_comparable.h> #include <__iterator/projected.h> #include <__ranges/access.h> #include <__ranges/concepts.h> +#include <__ranges/size.h> #include <__ranges/subrange.h> #include <__utility/move.h> @@ -53,8 +53,7 @@ struct __fn { _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { - auto __n2 = ranges::distance(__first2, __last2); - if (__n2 == 0) + if (__first2 == __last2) return true; auto __ret = ranges::search( @@ -70,14 +69,13 @@ struct __fn { requires indirectly_comparable, iterator_t<_Range2>, _Pred, _Proj1, _Proj2> _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr bool static operator()(_Range1&& __range1, _Range2&& __range2, _Pred __pred = {}, _Proj1 __proj1 = {}, _Proj2 __proj2 = {}) { - auto __n2 = 0; if constexpr (sized_range<_Range2>) { - __n2 = ranges::size(__range2); + if (ranges::size(__range2) == 0) + return true; } else { - __n2 = std::distance(cbegin(__range2), cend(__range2)); + if (ranges::begin(__range2) == ranges::end(__range2)) + return true; } - if (__n2 == 0) - return true; auto __ret = ranges::search(__range1, __range2, __pred, std::ref(__proj1), std::ref(__proj2)); return __ret.empty() == false; diff --git a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp index d48ee9e..761691c 100644 --- a/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp +++ b/libcxx/test/std/algorithms/alg.nonmodifying/alg.contains/ranges.contains_subrange.pass.cpp @@ -309,6 +309,10 @@ constexpr bool test() { }); }); + assert(std::ranges::contains_subrange( + std::views::iota(0, 5), std::views::iota(0, 5) | std::views::filter([](int) { return true; }))); + assert(!std::ranges::contains_subrange(std::views::iota(0ULL, 42ULL), std::views::iota(0ULL, 1ULL << 32))); + return true; } -- cgit v1.1 From ea4a11926b53be5d308a8b40eb7353d3f59eb5f5 Mon Sep 17 00:00:00 2001 From: Ryotaro KASUGA Date: Wed, 3 Apr 2024 09:28:09 +0900 Subject: =?UTF-8?q?Reapply=20"[CodeGen]=20Fix=20register=20pressure=20comp?= =?UTF-8?q?utation=20in=20MachinePipeli=E2=80=A6=20(#87312)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ner (#87030)" Fix broken test. This reverts commit b8ead2198f27924f91b90b6c104c1234ccc8972e. --- llvm/lib/CodeGen/MachinePipeliner.cpp | 2 +- llvm/test/CodeGen/AArch64/sms-regpress.mir | 160 +++++++++++++++++++++++++ llvm/test/CodeGen/PowerPC/sms-regpress.mir | 186 ++++------------------------- 3 files changed, 182 insertions(+), 166 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/sms-regpress.mir diff --git a/llvm/lib/CodeGen/MachinePipeliner.cpp b/llvm/lib/CodeGen/MachinePipeliner.cpp index eb42a78..b9c6765 100644 --- a/llvm/lib/CodeGen/MachinePipeliner.cpp +++ b/llvm/lib/CodeGen/MachinePipeliner.cpp @@ -1268,7 +1268,7 @@ private: // Calculate the upper limit of each pressure set void computePressureSetLimit(const RegisterClassInfo &RCI) { for (unsigned PSet = 0; PSet < PSetNum; PSet++) - PressureSetLimit[PSet] = RCI.getRegPressureSetLimit(PSet); + PressureSetLimit[PSet] = TRI->getRegPressureSetLimit(MF, PSet); // We assume fixed registers, such as stack pointer, are already in use. // Therefore subtracting the weight of the fixed registers from the limit of diff --git a/llvm/test/CodeGen/AArch64/sms-regpress.mir b/llvm/test/CodeGen/AArch64/sms-regpress.mir new file mode 100644 index 0000000..c75eba5 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sms-regpress.mir @@ -0,0 +1,160 @@ +# RUN: llc --verify-machineinstrs -mtriple=aarch64 -o - %s -run-pass pipeliner -aarch64-enable-pipeliner -pipeliner-max-mii=40 -pipeliner-register-pressure -pipeliner-ii-search-range=30 -debug-only=pipeliner 2>&1 | FileCheck %s + +# REQUIRES: asserts + +# Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. +# The specific value of II is not important. + +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} + +--- | + target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128" + + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef %n) local_unnamed_addr { + entry: + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 + %cmp133 = icmp sgt i32 %n, 0 + br i1 %cmp133, label %for.body.preheader, label %for.cond.cleanup + + for.body.preheader: ; preds = %entry + %wide.trip.count = zext nneg i32 %n to i64 + br label %for.body + + for.cond.cleanup: ; preds = %for.body, %entry + %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %add54, %for.body ] + ret double %res.0.lcssa + + for.body: ; preds = %for.body.preheader, %for.body + %lsr.iv137 = phi i64 [ %wide.trip.count, %for.body.preheader ], [ %lsr.iv.next, %for.body ] + %lsr.iv = phi ptr [ %b, %for.body.preheader ], [ %scevgep, %for.body ] + %res.0135 = phi double [ 0.000000e+00, %for.body.preheader ], [ %add54, %for.body ] + %2 = load double, ptr %lsr.iv, align 8 + %3 = tail call double @llvm.fmuladd.f64(double %0, double %2, double %0) + %4 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %3) + %5 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %4) + %6 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %5) + %7 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %6) + %8 = tail call double @llvm.fmuladd.f64(double %7, double %2, double %7) + %9 = tail call double @llvm.fmuladd.f64(double %8, double %2, double %8) + %10 = tail call double @llvm.fmuladd.f64(double %9, double %2, double %9) + %11 = tail call double @llvm.fmuladd.f64(double %10, double %2, double %10) + %12 = tail call double @llvm.fmuladd.f64(double %11, double %2, double %11) + %13 = tail call double @llvm.fmuladd.f64(double %12, double %2, double %12) + %14 = tail call double @llvm.fmuladd.f64(double %13, double %2, double %13) + %15 = tail call double @llvm.fmuladd.f64(double %14, double %2, double %14) + %16 = tail call double @llvm.fmuladd.f64(double %15, double %2, double %15) + %17 = tail call double @llvm.fmuladd.f64(double %16, double %2, double %16) + %18 = tail call double @llvm.fmuladd.f64(double %17, double %2, double %17) + %add = fadd double %17, %18 + %19 = tail call double @llvm.fmuladd.f64(double %18, double %2, double %add) + %add35 = fadd double %10, %19 + %20 = tail call double @llvm.fmuladd.f64(double %3, double %2, double %add35) + %add38 = fadd double %11, %20 + %21 = tail call double @llvm.fmuladd.f64(double %4, double %2, double %add38) + %add41 = fadd double %12, %21 + %22 = tail call double @llvm.fmuladd.f64(double %5, double %2, double %add41) + %add44 = fadd double %14, %15 + %add45 = fadd double %13, %add44 + %add46 = fadd double %add45, %22 + %23 = tail call double @llvm.fmuladd.f64(double %6, double %2, double %add46) + %mul = fmul double %2, %7 + %mul51 = fmul double %1, %mul + %24 = tail call double @llvm.fmuladd.f64(double %mul51, double %9, double %23) + %25 = tail call double @llvm.fmuladd.f64(double %8, double %1, double %24) + %add54 = fadd double %res.0135, %25 + %scevgep = getelementptr i8, ptr %lsr.iv, i64 8 + %lsr.iv.next = add nsw i64 %lsr.iv137, -1 + %exitcond.not = icmp eq i64 %lsr.iv.next, 0 + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + } + + declare double @llvm.fmuladd.f64(double, double, double) + +... +--- +name: kernel +tracksRegLiveness: true +liveins: + - { reg: '$x0', virtual-reg: '%10' } + - { reg: '$x1', virtual-reg: '%11' } + - { reg: '$w2', virtual-reg: '%12' } +body: | + bb.0.entry: + successors: %bb.1, %bb.4 + liveins: $x0, $x1, $w2 + + %12:gpr32common = COPY $w2 + %11:gpr64 = COPY $x1 + %10:gpr64common = COPY $x0 + dead $wzr = SUBSWri %12, 1, 0, implicit-def $nzcv + Bcc 10, %bb.1, implicit $nzcv + + bb.4: + %13:fpr64 = FMOVD0 + B %bb.2 + + bb.1.for.body.preheader: + %0:fpr64 = LDRDui %10, 0 :: (load (s64) from %ir.a) + %1:fpr64 = LDRDui %10, 1 :: (load (s64) from %ir.arrayidx1) + %16:gpr32 = ORRWrs $wzr, %12, 0 + %2:gpr64all = SUBREG_TO_REG 0, killed %16, %subreg.sub_32 + %15:fpr64 = FMOVD0 + B %bb.3 + + bb.2.for.cond.cleanup: + %3:fpr64 = PHI %13, %bb.4, %7, %bb.3 + $d0 = COPY %3 + RET_ReallyLR implicit $d0 + + bb.3.for.body: + successors: %bb.2, %bb.3 + + %4:gpr64sp = PHI %2, %bb.1, %9, %bb.3 + %5:gpr64sp = PHI %11, %bb.1, %8, %bb.3 + %6:fpr64 = PHI %15, %bb.1, %7, %bb.3 + early-clobber %17:gpr64sp, %18:fpr64 = LDRDpost %5, 8 :: (load (s64) from %ir.lsr.iv) + %19:fpr64 = nofpexcept FMADDDrrr %0, %18, %0, implicit $fpcr + %20:fpr64 = nofpexcept FMADDDrrr %19, %18, %19, implicit $fpcr + %21:fpr64 = nofpexcept FMADDDrrr %20, %18, %20, implicit $fpcr + %22:fpr64 = nofpexcept FMADDDrrr %21, %18, %21, implicit $fpcr + %23:fpr64 = nofpexcept FMADDDrrr %22, %18, %22, implicit $fpcr + %24:fpr64 = nofpexcept FMADDDrrr %23, %18, %23, implicit $fpcr + %25:fpr64 = nofpexcept FMADDDrrr %24, %18, %24, implicit $fpcr + %26:fpr64 = nofpexcept FMADDDrrr %25, %18, %25, implicit $fpcr + %27:fpr64 = nofpexcept FMADDDrrr %26, %18, %26, implicit $fpcr + %28:fpr64 = nofpexcept FMADDDrrr %27, %18, %27, implicit $fpcr + %29:fpr64 = nofpexcept FMADDDrrr %28, %18, %28, implicit $fpcr + %30:fpr64 = nofpexcept FMADDDrrr %29, %18, %29, implicit $fpcr + %31:fpr64 = nofpexcept FMADDDrrr %30, %18, %30, implicit $fpcr + %32:fpr64 = nofpexcept FMADDDrrr %31, %18, %31, implicit $fpcr + %33:fpr64 = nofpexcept FMADDDrrr %32, %18, %32, implicit $fpcr + %34:fpr64 = nofpexcept FMADDDrrr %33, %18, %33, implicit $fpcr + %35:fpr64 = nofpexcept FADDDrr %33, %34, implicit $fpcr + %36:fpr64 = nofpexcept FMADDDrrr %34, %18, killed %35, implicit $fpcr + %37:fpr64 = nofpexcept FADDDrr %26, killed %36, implicit $fpcr + %38:fpr64 = nofpexcept FMADDDrrr %19, %18, killed %37, implicit $fpcr + %39:fpr64 = nofpexcept FADDDrr %27, killed %38, implicit $fpcr + %40:fpr64 = nofpexcept FMADDDrrr %20, %18, killed %39, implicit $fpcr + %41:fpr64 = nofpexcept FADDDrr %28, killed %40, implicit $fpcr + %42:fpr64 = nofpexcept FMADDDrrr %21, %18, killed %41, implicit $fpcr + %43:fpr64 = nofpexcept FADDDrr %30, %31, implicit $fpcr + %44:fpr64 = nofpexcept FADDDrr %29, killed %43, implicit $fpcr + %45:fpr64 = nofpexcept FADDDrr killed %44, killed %42, implicit $fpcr + %46:fpr64 = nofpexcept FMADDDrrr %22, %18, killed %45, implicit $fpcr + %47:fpr64 = nofpexcept FMULDrr %18, %23, implicit $fpcr + %48:fpr64 = nofpexcept FMULDrr %1, killed %47, implicit $fpcr + %49:fpr64 = nofpexcept FMADDDrrr killed %48, %25, killed %46, implicit $fpcr + %50:fpr64 = nofpexcept FMADDDrrr %24, %1, killed %49, implicit $fpcr + %7:fpr64 = nofpexcept FADDDrr %6, killed %50, implicit $fpcr + %8:gpr64all = COPY %17 + %51:gpr64 = nsw SUBSXri %4, 1, 0, implicit-def $nzcv + %9:gpr64all = COPY %51 + Bcc 0, %bb.2, implicit $nzcv + B %bb.3 + +... diff --git a/llvm/test/CodeGen/PowerPC/sms-regpress.mir b/llvm/test/CodeGen/PowerPC/sms-regpress.mir index cebd78a..b01115c 100644 --- a/llvm/test/CodeGen/PowerPC/sms-regpress.mir +++ b/llvm/test/CodeGen/PowerPC/sms-regpress.mir @@ -1,41 +1,30 @@ -# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s +# RUN: llc --verify-machineinstrs -mcpu=pwr9 -o - %s -run-pass=pipeliner -ppc-enable-pipeliner -pipeliner-register-pressure -pipeliner-max-mii=50 -pipeliner-ii-search-range=30 -pipeliner-max-stages=10 -debug-only=pipeliner 2>&1 | FileCheck %s # REQUIRES: asserts # Check that if the register pressure is too high, the schedule is rejected, II is incremented, and scheduling continues. # The specific value of II is not important. -# CHECK: Try to schedule with 21 -# CHECK: Can't schedule -# CHECK: Try to schedule with 22 -# CHECK: Can't schedule -# CHECK: Try to schedule with 23 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 24 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 25 -# CHECK: Rejected the schedule because of too high register pressure -# CHECK: Try to schedule with 26 -# CHECK: Schedule Found? 1 (II=26) +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Rejected the schedule because of too high register pressure{{$}} +# CHECK: {{^ *}}Try to schedule with {{[0-9]+$}} +# CHECK: {{^ *}}Schedule Found? 1 (II={{[0-9]+}}){{$}} --- | - ; ModuleID = 'a.ll' - source_filename = "a.c" target datalayout = "e-m:e-Fn32-i64:64-n32:64" target triple = "ppc64le" - ; Function Attrs: nofree nosync nounwind memory(argmem: read) uwtable - define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 { + define dso_local double @kernel(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr { entry: - %0 = load double, ptr %a, align 8, !tbaa !3 - %arrayidx1 = getelementptr inbounds double, ptr %a, i64 1 - %1 = load double, ptr %arrayidx1, align 8, !tbaa !3 + %0 = load double, ptr %a, align 8 + %arrayidx1 = getelementptr inbounds i8, ptr %a, i64 8 + %1 = load double, ptr %arrayidx1, align 8 %cmp163 = icmp sgt i32 %n, 0 br i1 %cmp163, label %for.body.preheader, label %for.cond.cleanup for.body.preheader: ; preds = %entry - %wide.trip.count = zext i32 %n to i64 - %scevgep1 = getelementptr i8, ptr %b, i64 -8 + %wide.trip.count = zext nneg i32 %n to i64 + %scevgep167 = getelementptr i8, ptr %b, i64 -8 call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count) br label %for.body @@ -43,11 +32,11 @@ %res.0.lcssa = phi double [ 0.000000e+00, %entry ], [ %30, %for.body ] ret double %res.0.lcssa - for.body: ; preds = %for.body, %for.body.preheader + for.body: ; preds = %for.body.preheader, %for.body %res.0165 = phi double [ 0.000000e+00, %for.body.preheader ], [ %30, %for.body ] - %2 = phi ptr [ %scevgep1, %for.body.preheader ], [ %3, %for.body ] + %2 = phi ptr [ %scevgep167, %for.body.preheader ], [ %3, %for.body ] %3 = getelementptr i8, ptr %2, i64 8 - %4 = load double, ptr %3, align 8, !tbaa !3 + %4 = load double, ptr %3, align 8 %5 = tail call double @llvm.fmuladd.f64(double %0, double %4, double %0) %6 = tail call double @llvm.fmuladd.f64(double %5, double %4, double %5) %7 = tail call double @llvm.fmuladd.f64(double %6, double %4, double %6) @@ -92,152 +81,23 @@ %mul66 = fmul double %12, %mul65 %30 = tail call double @llvm.fmuladd.f64(double %mul66, double %10, double %res.0165) %31 = call i1 @llvm.loop.decrement.i64(i64 1) - br i1 %31, label %for.body, label %for.cond.cleanup, !llvm.loop !7 + br i1 %31, label %for.body, label %for.cond.cleanup } - ; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) - declare double @llvm.fmuladd.f64(double, double, double) #1 + declare double @llvm.fmuladd.f64(double, double, double) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare void @llvm.set.loop.iterations.i64(i64) #2 + declare void @llvm.set.loop.iterations.i64(i64) - ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn - declare i1 @llvm.loop.decrement.i64(i64) #2 + declare i1 @llvm.loop.decrement.i64(i64) - attributes #0 = { nofree nosync nounwind memory(argmem: read) uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-aix-small-local-exec-tls,-privileged,-rop-protect,-spe" } - attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } - attributes #2 = { nocallback noduplicate nofree nosync nounwind willreturn } - - !llvm.module.flags = !{!0, !1} - !llvm.ident = !{!2} - - !0 = !{i32 1, !"wchar_size", i32 4} - !1 = !{i32 7, !"uwtable", i32 2} - !2 = !{!"clang version 18.0.0 (https://miratech-soft@dev.azure.com/miratech-soft/llvm/_git/llvm c8d01fb665fc5d9378100a6d92ebcd3be49be655)"} - !3 = !{!4, !4, i64 0} - !4 = !{!"double", !5, i64 0} - !5 = !{!"omnipotent char", !6, i64 0} - !6 = !{!"Simple C/C++ TBAA"} - !7 = distinct !{!7, !8, !9} - !8 = !{!"llvm.loop.mustprogress"} - !9 = !{!"llvm.loop.unroll.disable"} - ... --- name: kernel -alignment: 16 -exposesReturnsTwice: false -legalized: false -regBankSelected: false -selected: false -failedISel: false tracksRegLiveness: true -hasWinCFI: false -callsEHReturn: false -callsUnwindInit: false -hasEHCatchret: false -hasEHScopes: false -hasEHFunclets: false -isOutlined: false -debugInstrRef: false -failsVerification: false -tracksDebugUserValues: false -registers: - - { id: 0, class: vsfrc, preferred-register: '' } - - { id: 1, class: vsfrc, preferred-register: '' } - - { id: 2, class: g8rc, preferred-register: '' } - - { id: 3, class: vsfrc, preferred-register: '' } - - { id: 4, class: vsfrc, preferred-register: '' } - - { id: 5, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 6, class: g8rc, preferred-register: '' } - - { id: 7, class: vsfrc, preferred-register: '' } - - { id: 8, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 9, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 10, class: g8rc, preferred-register: '' } - - { id: 11, class: gprc, preferred-register: '' } - - { id: 12, class: vsfrc, preferred-register: '' } - - { id: 13, class: crrc, preferred-register: '' } - - { id: 14, class: vsfrc, preferred-register: '' } - - { id: 15, class: g8rc, preferred-register: '' } - - { id: 16, class: g8rc, preferred-register: '' } - - { id: 17, class: g8rc, preferred-register: '' } - - { id: 18, class: f8rc, preferred-register: '' } - - { id: 19, class: g8rc_and_g8rc_nox0, preferred-register: '' } - - { id: 20, class: vsfrc, preferred-register: '' } - - { id: 21, class: vsfrc, preferred-register: '' } - - { id: 22, class: vsfrc, preferred-register: '' } - - { id: 23, class: vsfrc, preferred-register: '' } - - { id: 24, class: vsfrc, preferred-register: '' } - - { id: 25, class: vsfrc, preferred-register: '' } - - { id: 26, class: vsfrc, preferred-register: '' } - - { id: 27, class: vsfrc, preferred-register: '' } - - { id: 28, class: vsfrc, preferred-register: '' } - - { id: 29, class: vsfrc, preferred-register: '' } - - { id: 30, class: vsfrc, preferred-register: '' } - - { id: 31, class: vsfrc, preferred-register: '' } - - { id: 32, class: vsfrc, preferred-register: '' } - - { id: 33, class: vsfrc, preferred-register: '' } - - { id: 34, class: vsfrc, preferred-register: '' } - - { id: 35, class: vsfrc, preferred-register: '' } - - { id: 36, class: vsfrc, preferred-register: '' } - - { id: 37, class: vsfrc, preferred-register: '' } - - { id: 38, class: vsfrc, preferred-register: '' } - - { id: 39, class: vsfrc, preferred-register: '' } - - { id: 40, class: vsfrc, preferred-register: '' } - - { id: 41, class: vsfrc, preferred-register: '' } - - { id: 42, class: vsfrc, preferred-register: '' } - - { id: 43, class: vsfrc, preferred-register: '' } - - { id: 44, class: vsfrc, preferred-register: '' } - - { id: 45, class: vsfrc, preferred-register: '' } - - { id: 46, class: vsfrc, preferred-register: '' } - - { id: 47, class: vsfrc, preferred-register: '' } - - { id: 48, class: vsfrc, preferred-register: '' } - - { id: 49, class: vsfrc, preferred-register: '' } - - { id: 50, class: vsfrc, preferred-register: '' } - - { id: 51, class: vsfrc, preferred-register: '' } - - { id: 52, class: vsfrc, preferred-register: '' } - - { id: 53, class: vsfrc, preferred-register: '' } - - { id: 54, class: vsfrc, preferred-register: '' } - - { id: 55, class: vsfrc, preferred-register: '' } - - { id: 56, class: vsfrc, preferred-register: '' } - - { id: 57, class: vsfrc, preferred-register: '' } - - { id: 58, class: vsfrc, preferred-register: '' } - - { id: 59, class: vsfrc, preferred-register: '' } - - { id: 60, class: vsfrc, preferred-register: '' } - - { id: 61, class: vsfrc, preferred-register: '' } - - { id: 62, class: crbitrc, preferred-register: '' } liveins: - { reg: '$x3', virtual-reg: '%8' } - { reg: '$x4', virtual-reg: '%9' } - { reg: '$x5', virtual-reg: '%10' } -frameInfo: - isFrameAddressTaken: false - isReturnAddressTaken: false - hasStackMap: false - hasPatchPoint: false - stackSize: 0 - offsetAdjustment: 0 - maxAlignment: 1 - adjustsStack: false - hasCalls: false - stackProtector: '' - functionContext: '' - maxCallFrameSize: 4294967295 - cvBytesOfCalleeSavedRegisters: 0 - hasOpaqueSPAdjustment: false - hasVAStart: false - hasMustTailInVarArgFunc: false - hasTailCall: false - localFrameSize: 0 - savePoint: '' - restorePoint: '' -fixedStack: [] -stack: [] -entry_values: [] -callSites: [] -debugValueSubstitutions: [] -constants: [] -machineFunctionInfo: {} body: | bb.0.entry: successors: %bb.2(0x50000000), %bb.1(0x30000000) @@ -251,16 +111,12 @@ body: | BCC 44, killed %13, %bb.2 bb.1: - successors: %bb.3(0x80000000) - %12:vsfrc = XXLXORdpz B %bb.3 bb.2.for.body.preheader: - successors: %bb.4(0x80000000) - - %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a, !tbaa !3) - %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1, !tbaa !3) + %0:vsfrc = DFLOADf64 0, %8 :: (load (s64) from %ir.a) + %1:vsfrc = DFLOADf64 8, killed %8 :: (load (s64) from %ir.arrayidx1) %16:g8rc = IMPLICIT_DEF %15:g8rc = INSERT_SUBREG killed %16, killed %11, %subreg.sub_32 %17:g8rc = RLDICL killed %15, 0, 32 @@ -279,7 +135,7 @@ body: | %4:vsfrc = PHI %14, %bb.2, %7, %bb.4 %5:g8rc_and_g8rc_nox0 = PHI %2, %bb.2, %6, %bb.4 - %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3, !tbaa !3) + %18:f8rc, %19:g8rc_and_g8rc_nox0 = LFDU 8, killed %5 :: (load (s64) from %ir.3) %6:g8rc = COPY killed %19 %20:vsfrc = nofpexcept XSMADDADP %0, %0, %18, implicit $rm %21:vsfrc = nofpexcept XSMADDADP %20, %20, %18, implicit $rm -- cgit v1.1 From 3ae5c77e976c02ce9e575870e4316af51fe97075 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 19:44:49 -0500 Subject: [libc] Move include so it covers the other files Summary: This is more hacky, but I want to get the bot green before we work on a better solution. --- libc/src/stdio/printf_core/core_structs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/src/stdio/printf_core/core_structs.h b/libc/src/stdio/printf_core/core_structs.h index b77b304..bfe362b 100644 --- a/libc/src/stdio/printf_core/core_structs.h +++ b/libc/src/stdio/printf_core/core_structs.h @@ -9,10 +9,11 @@ #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CORE_STRUCTS_H +#include "src/__support/macros/config.h" + #include "src/__support/CPP/string_view.h" #include "src/__support/CPP/type_traits.h" #include "src/__support/FPUtil/FPBits.h" -#include "src/__support/macros/config.h" #include "src/stdio/printf_core/printf_config.h" #include -- cgit v1.1 From a27d886ce4cc8be8f67a8331c400d6fe2a273ebd Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Wed, 3 Apr 2024 09:57:25 +0900 Subject: [mlir][linalg][bufferize] Fix element-wise access optimization for sparse tensors (#87305) `linalg.generic` ops with sparse tensors do not necessarily bufferize to element-wise access, because insertions into a sparse tensor may change the layout of (or reallocate) the underlying sparse data structures. --- .../Transforms/BufferizableOpInterfaceImpl.cpp | 5 +++ .../one_shot_bufferize_tensor_copy_insertion.mlir | 36 ++++++++++++++++++++++ 2 files changed, 41 insertions(+) diff --git a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp index 58fb2e9..899b8c8 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/BufferizableOpInterfaceImpl.cpp @@ -11,6 +11,7 @@ #include "mlir/Dialect/Bufferization/IR/Bufferization.h" #include "mlir/Dialect/Bufferization/IR/DstBufferizableOpInterfaceImpl.h" #include "mlir/Dialect/Linalg/IR/Linalg.h" +#include "mlir/Dialect/SparseTensor/IR/SparseTensor.h" #include "mlir/Dialect/Tensor/IR/Tensor.h" #include "mlir/IR/Dialect.h" #include "mlir/IR/Operation.h" @@ -110,6 +111,10 @@ struct LinalgOpInterface ArrayRef opOperands) const { auto linalgOp = cast(op); + // Accesses into sparse data structures are not necessarily elementwise. + if (sparse_tensor::hasAnySparseOperand(linalgOp)) + return false; + // All loops must be parallel. if (linalgOp.getNumLoops() != linalgOp.getNumParallelLoops()) return false; diff --git a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir index 6c2292b..b769acd 100644 --- a/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir +++ b/mlir/test/Dialect/SparseTensor/one_shot_bufferize_tensor_copy_insertion.mlir @@ -70,3 +70,39 @@ func.func @update_notinplace(%argb: tensor<10xf32>, %arga: tensor<10xf32, #SV>) } -> tensor<10xf32> return %0, %argb : tensor<10xf32>, tensor<10xf32> } + +#map = affine_map<(d0, d1) -> (d0, d1)> +#map1 = affine_map<(d0, d1, d2) -> (d0, d2)> +#map2 = affine_map<(d0, d1, d2) -> (d2, d1)> +#map3 = affine_map<(d0, d1, d2) -> (d0, d1)> +#sparse = #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : dense, d1 : compressed), posWidth = 64, crdWidth = 64 }> + +// linalg.generic with sparse tensors does not necessarily bufferize to +// element-wise access into the underlying sparse data structures. + +// CHECK-LABEL: func @sparse_non_elementwise( +func.func @sparse_non_elementwise(%arg0: tensor<64x64xf32, #sparse>, %arg1: tensor<64x64xf32>, %arg2: tensor<64x64xf32>) -> tensor<64x64xf32> { + %cst = arith.constant 0.000000e+00 : f32 + // CHECK: %[[alloc0:.*]] = bufferization.alloc_tensor() + // CHECK: %[[alloc1:.*]] = bufferization.alloc_tensor() + %0 = bufferization.alloc_tensor() : tensor<64x64xf32> + // CHECK: %[[generic0:.*]] = linalg.generic {{.*}} outs(%[[alloc1]] : {{.*}}) + %1 = linalg.generic {indexing_maps = [#map], iterator_types = ["parallel", "parallel"]} outs(%0 : tensor<64x64xf32>) { + ^bb0(%out: f32): + linalg.yield %cst : f32 + } -> tensor<64x64xf32> + // CHECK: linalg.generic {{.*}} outs(%[[generic0]] : {{.*}}) + %2 = linalg.generic {indexing_maps = [#map1, #map2, #map3], iterator_types = ["parallel", "parallel", "reduction"]} ins(%arg2, %arg2 : tensor<64x64xf32>, tensor<64x64xf32>) outs(%1 : tensor<64x64xf32>) { + ^bb0(%in: f32, %in_0: f32, %out: f32): + %4 = arith.mulf %in, %in_0 : f32 + %5 = arith.addf %out, %4 : f32 + linalg.yield %5 : f32 + } -> tensor<64x64xf32> + // CHECK: linalg.generic {{.*}} outs(%[[alloc0]] : {{.*}}) + %3 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%arg0, %2 : tensor<64x64xf32, #sparse>, tensor<64x64xf32>) outs(%0 : tensor<64x64xf32>) attrs = {sorted = true} { + ^bb0(%in: f32, %in_0: f32, %out: f32): + %4 = arith.mulf %in, %in_0 : f32 + linalg.yield %4 : f32 + } -> tensor<64x64xf32> + return %3 : tensor<64x64xf32> +} -- cgit v1.1 From 3b19cd7f80d8464d5f1bd8b2a0adf925d10556c4 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 2 Apr 2024 18:34:03 -0700 Subject: [RISCV] Slightly simplify RVVArgDispatcher::constructArgInfos. NFC (#87308) Use a single insert for the non-mask case instead of a push_back followed by an insert that may contain 0 registers. --- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index ee83f9d..279d8a4 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -21115,12 +21115,10 @@ void RVVArgDispatcher::constructArgInfos(ArrayRef TypeList) { RegisterVT.getVectorElementType() == MVT::i1) { RVVArgInfos.push_back({1, RegisterVT, true}); FirstVMaskAssigned = true; - } else { - RVVArgInfos.push_back({1, RegisterVT, false}); + --NumRegs; } - RVVArgInfos.insert(RVVArgInfos.end(), --NumRegs, - {1, RegisterVT, false}); + RVVArgInfos.insert(RVVArgInfos.end(), NumRegs, {1, RegisterVT, false}); } } } -- cgit v1.1 From c925c1646dd248d15ae93c6b3cbd04bb86b9775f Mon Sep 17 00:00:00 2001 From: Jan Svoboda Date: Tue, 2 Apr 2024 18:36:53 -0700 Subject: [clang][modules] NFCI: Pragma diagnostic mappings: write/read `FileID` instead of `SourceLocation` (#87427) For pragma diagnostic mappings, we always write/read `SourceLocation` with offset 0. This is equivalent to just writing a `FileID`, which is exactly what this patch starts doing. Originally reviewed here: https://reviews.llvm.org/D137213 --- clang/include/clang/Serialization/ASTBitCodes.h | 2 +- clang/lib/Serialization/ASTReader.cpp | 10 +++++----- clang/lib/Serialization/ASTWriter.cpp | 4 +--- 3 files changed, 7 insertions(+), 9 deletions(-) diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index f31efa5..f762116 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 29; +const unsigned VERSION_MAJOR = 30; /// AST file minor version number supported by this version of /// Clang. diff --git a/clang/lib/Serialization/ASTReader.cpp b/clang/lib/Serialization/ASTReader.cpp index 004859e..9a39e7d 100644 --- a/clang/lib/Serialization/ASTReader.cpp +++ b/clang/lib/Serialization/ASTReader.cpp @@ -6622,17 +6622,17 @@ void ASTReader::ReadPragmaDiagnosticMappings(DiagnosticsEngine &Diag) { while (NumLocations--) { assert(Idx < Record.size() && "Invalid data, missing pragma diagnostic states"); - SourceLocation Loc = ReadSourceLocation(F, Record[Idx++]); - auto IDAndOffset = SourceMgr.getDecomposedLoc(Loc); - assert(IDAndOffset.first.isValid() && "invalid FileID for transition"); - assert(IDAndOffset.second == 0 && "not a start location for a FileID"); + FileID FID = ReadFileID(F, Record, Idx); + assert(FID.isValid() && "invalid FileID for transition"); + // FIXME: Remove this once we don't need the side-effects. + (void)SourceMgr.getSLocEntryOrNull(FID); unsigned Transitions = Record[Idx++]; // Note that we don't need to set up Parent/ParentOffset here, because // we won't be changing the diagnostic state within imported FileIDs // (other than perhaps appending to the main source file, which has no // parent). - auto &F = Diag.DiagStatesByLoc.Files[IDAndOffset.first]; + auto &F = Diag.DiagStatesByLoc.Files[FID]; F.StateTransitions.reserve(F.StateTransitions.size() + Transitions); for (unsigned I = 0; I != Transitions; ++I) { unsigned Offset = Record[Idx++]; diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index a2668e6..0148eb4 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -3131,9 +3131,7 @@ void ASTWriter::WritePragmaDiagnosticMappings(const DiagnosticsEngine &Diag, continue; ++NumLocations; - SourceLocation Loc = Diag.SourceMgr->getComposedLoc(FileIDAndFile.first, 0); - assert(!Loc.isInvalid() && "start loc for valid FileID is invalid"); - AddSourceLocation(Loc, Record); + AddFileID(FileIDAndFile.first, Record); Record.push_back(FileIDAndFile.second.StateTransitions.size()); for (auto &StatePoint : FileIDAndFile.second.StateTransitions) { -- cgit v1.1 From 01e227487f4674e2627d3db4f357ee83fa04c7d6 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 2 Apr 2024 18:47:49 -0700 Subject: [ELF] Sort IRELATIVE by offset Improve the test gnu-ifunc-nonpreemptible.s to check IRELATIVE offsets. Ensure that IRELATIVE offsets are ordered to improve locality. --- lld/ELF/SyntheticSections.cpp | 2 +- lld/test/ELF/gnu-ifunc-nonpreemptible.s | 75 +++++++++++++++++++++------------ lld/test/ELF/gnu-ifunc-relative.s | 25 ----------- 3 files changed, 48 insertions(+), 54 deletions(-) delete mode 100644 lld/test/ELF/gnu-ifunc-relative.s diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 8708bfe..3494352 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1666,7 +1666,7 @@ void RelocationBaseSection::computeRels() { parallelForEach(relocs, [symTab](DynamicReloc &rel) { rel.computeRaw(symTab); }); - auto irelative = std::partition( + auto irelative = std::stable_partition( relocs.begin() + numRelativeRelocs, relocs.end(), [t = target->iRelativeRel](auto &r) { return r.type != t; }); diff --git a/lld/test/ELF/gnu-ifunc-nonpreemptible.s b/lld/test/ELF/gnu-ifunc-nonpreemptible.s index b209b0c..e03429d 100644 --- a/lld/test/ELF/gnu-ifunc-nonpreemptible.s +++ b/lld/test/ELF/gnu-ifunc-nonpreemptible.s @@ -1,62 +1,75 @@ # REQUIRES: x86 -# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t.o -# RUN: ld.lld %t.o -o %t -# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn %t | FileCheck %s --check-prefix=DISASM -# RUN: llvm-readelf -r -s %t | FileCheck %s +# RUN: rm -rf %t && split-file %s %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 a.s -o a.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 b.s -o b.o +# RUN: ld.lld -shared -soname=b.so b.o -o b.so -# RUN: ld.lld --export-dynamic %t.o -o %t -# RUN: llvm-readelf -r -s %t | FileCheck %s +# RUN: ld.lld a.o -o a +# RUN: llvm-objdump --no-print-imm-hex -d --no-show-raw-insn a | FileCheck %s --check-prefix=DISASM +# RUN: llvm-readelf -r -s a | FileCheck %s -# CHECK: Relocation section '.rela.dyn' at offset {{.*}} contains 2 entries: +# CHECK: Relocation section '.rela.dyn' at offset {{.*}} contains 3 entries: # CHECK-NEXT: Type -# CHECK-NEXT: R_X86_64_IRELATIVE -# CHECK-NEXT: R_X86_64_IRELATIVE +# CHECK-NEXT: {{0*}}[[#%x,O:]] [[#%x,]] R_X86_64_IRELATIVE [[#%x,QUX:]] +# CHECK-NEXT: {{0*}}[[#O+8]] [[#%x,]] R_X86_64_IRELATIVE +# CHECK-NEXT: {{0*}}[[#O+16]] [[#%x,]] R_X86_64_IRELATIVE -# CHECK: 0 NOTYPE LOCAL HIDDEN [[#]] __rela_iplt_start -# CHECK-NEXT: 0 NOTYPE LOCAL HIDDEN [[#]] __rela_iplt_end +# CHECK: 0 NOTYPE LOCAL HIDDEN [[#]] __rela_iplt_start +# CHECK-NEXT: 0 NOTYPE LOCAL HIDDEN [[#]] __rela_iplt_end +# CHECK-NEXT: {{0*}}[[#QUX]] 0 IFUNC GLOBAL DEFAULT [[#]] qux -# RUN: ld.lld -pie %t.o -o %t1 -# RUN: llvm-readelf -s %t1 | FileCheck %s --check-prefix=PIC -# RUN: ld.lld -shared %t.o -o %t2 -# RUN: llvm-readelf -s %t2 | FileCheck %s --check-prefix=PIC +# RUN: ld.lld -pie a.o b.so -o a1 +# RUN: llvm-readelf -rs a1 | FileCheck %s --check-prefixes=PIC,PIE +# RUN: ld.lld -shared a.o b.so -o a2 +# RUN: llvm-readelf -rs a2 | FileCheck %s --check-prefix=PIC + +# PIC: R_X86_64_GLOB_DAT 0000000000000000 ext + 0 +# PIC-NEXT: {{0*}}[[#%x,O:]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_start + 0 +# PIC-NEXT: {{0*}}[[#O+8]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_end + 0 +# PIE-NEXT: {{0*}}[[#O+16]] [[#%x,]] R_X86_64_IRELATIVE +# PIE-NEXT: {{0*}}[[#O+24]] [[#%x,]] R_X86_64_IRELATIVE +# PIE-NEXT: {{0*}}[[#O+32]] [[#%x,]] R_X86_64_IRELATIVE # PIC: 0 NOTYPE WEAK DEFAULT UND __rela_iplt_start # PIC-NEXT: 0 NOTYPE WEAK DEFAULT UND __rela_iplt_end # DISASM: Disassembly of section .text: # DISASM-EMPTY: -# DISASM-NEXT: : +# DISASM-NEXT: : +# DISASM: : # DISASM: : # DISASM: : # DISASM: <_start>: # DISASM-NEXT: callq 0x[[#%x,foo:]] # DISASM-NEXT: callq 0x[[#%x,bar:]] +# DISASM-NEXT: callq 0x[[#%x,qux:]] # DISASM-EMPTY: # DISASM-NEXT: Disassembly of section .iplt: # DISASM-EMPTY: # DISASM-NEXT: <.iplt>: -# DISASM-NEXT: [[#foo]]: jmpq *{{.*}}(%rip) +# DISASM-NEXT: [[#qux]]: jmpq *{{.*}}(%rip) # DISASM-NEXT: pushq $0 # DISASM-NEXT: jmp 0x0 -# DISASM-NEXT: [[#bar]]: jmpq *{{.*}}(%rip) +# DISASM-NEXT: [[#foo]]: jmpq *{{.*}}(%rip) # DISASM-NEXT: pushq $1 # DISASM-NEXT: jmp 0x0 +# DISASM-NEXT: [[#bar]]: jmpq *{{.*}}(%rip) +# DISASM-NEXT: pushq $2 +# DISASM-NEXT: jmp 0x0 -.text +#--- a.s +.globl qux, foo, bar +.type qux, @gnu_indirect_function .type foo STT_GNU_IFUNC -.globl foo -foo: - ret - .type bar STT_GNU_IFUNC -.globl bar -bar: - ret +qux: ret +foo: ret +bar: ret .type unused, @gnu_indirect_function .globl unused -unused: - ret +.weak ext +unused: mov ext@gotpcrel(%rip), %rax .weak __rela_iplt_start .weak __rela_iplt_end @@ -65,7 +78,13 @@ unused: _start: call foo call bar + call qux .data .quad __rela_iplt_start .quad __rela_iplt_end + +#--- b.s +.globl ext +ext: + ret diff --git a/lld/test/ELF/gnu-ifunc-relative.s b/lld/test/ELF/gnu-ifunc-relative.s deleted file mode 100644 index 278bc50..0000000 --- a/lld/test/ELF/gnu-ifunc-relative.s +++ /dev/null @@ -1,25 +0,0 @@ -// REQUIRES: x86 -// RUN: llvm-mc -filetype=obj -triple=x86_64-pc-linux %s -o %t.o -// RUN: ld.lld --strip-all %t.o -o %t -// RUN: llvm-readobj -r %t | FileCheck %s -// RUN: ld.lld %t.o -o %t -// RUN: llvm-readobj -r --symbols %t | FileCheck %s --check-prefixes=CHECK,SYM - -.type foo STT_GNU_IFUNC -.globl foo -foo: - ret - -.globl _start -_start: - call foo - -// CHECK: Section ({{.*}}) .rela.dyn { -// CHECK-NEXT: R_X86_64_IRELATIVE - 0x[[ADDR:.*]] -// CHECK-NEXT: } - -// SYM: Name: foo -// SYM-NEXT: Value: 0x[[ADDR]] -// SYM-NEXT: Size: 0 -// SYM-NEXT: Binding: Global -// SYM-NEXT: Type: GNU_IFunc -- cgit v1.1 From 943f39d29e1ec0d005977e6c3e85390119b8cb4e Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Tue, 2 Apr 2024 20:53:35 -0500 Subject: Revert "[Libomptarget] Add RPC-based `printf` implementation for OpenMP (#85638)" This reverts commit 2cf8118e3aa60f406ec41e88bdd4304f39744e89. Failing tests, revert until I can fix it --- openmp/libomptarget/DeviceRTL/CMakeLists.txt | 5 ---- openmp/libomptarget/DeviceRTL/src/LibC.cpp | 13 ---------- openmp/libomptarget/test/libc/printf.c | 36 ---------------------------- 3 files changed, 54 deletions(-) delete mode 100644 openmp/libomptarget/test/libc/printf.c diff --git a/openmp/libomptarget/DeviceRTL/CMakeLists.txt b/openmp/libomptarget/DeviceRTL/CMakeLists.txt index 2e7f28d..2509f12 100644 --- a/openmp/libomptarget/DeviceRTL/CMakeLists.txt +++ b/openmp/libomptarget/DeviceRTL/CMakeLists.txt @@ -122,11 +122,6 @@ set(clang_opt_flags -O3 -mllvm -openmp-opt-disable -DSHARED_SCRATCHPAD_SIZE=512 set(link_opt_flags -O3 -openmp-opt-disable -attributor-enable=module -vectorize-slp=false ) set(link_export_flag -passes=internalize -internalize-public-api-file=${source_directory}/exports) -# If the user built with the GPU C library enabled we will use that instead. -if(${LIBOMPTARGET_GPU_LIBC_SUPPORT}) - list(APPEND clang_opt_flags -DOMPTARGET_HAS_LIBC) -endif() - # Prepend -I to each list element set (LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL "${LIBOMPTARGET_LLVM_INCLUDE_DIRS}") list(TRANSFORM LIBOMPTARGET_LLVM_INCLUDE_DIRS_DEVICERTL PREPEND "-I") diff --git a/openmp/libomptarget/DeviceRTL/src/LibC.cpp b/openmp/libomptarget/DeviceRTL/src/LibC.cpp index 33fec81..af675b9 100644 --- a/openmp/libomptarget/DeviceRTL/src/LibC.cpp +++ b/openmp/libomptarget/DeviceRTL/src/LibC.cpp @@ -53,23 +53,10 @@ void memset(void *dst, int C, size_t count) { dstc[I] = C; } -// If the user built with the GPU C library enabled we will assume that we can -// call it. -#ifdef OMPTARGET_HAS_LIBC - -// TODO: Remove this handling once we have varargs support. -extern struct FILE *stdout; -int32_t rpc_fprintf(FILE *, const char *, void *, uint64_t); - -int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { - return rpc_fprintf(stdout, Format, Arguments, Size); -} -#else /// printf() calls are rewritten by CGGPUBuiltin to __llvm_omp_vprintf int32_t __llvm_omp_vprintf(const char *Format, void *Arguments, uint32_t Size) { return impl::omp_vprintf(Format, Arguments, Size); } -#endif } #pragma omp end declare target diff --git a/openmp/libomptarget/test/libc/printf.c b/openmp/libomptarget/test/libc/printf.c deleted file mode 100644 index 64cdd80..0000000 --- a/openmp/libomptarget/test/libc/printf.c +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %libomptarget-compile-run-and-check-generic - -// REQUIRES: libc - -#include - -int main() { - // CHECK: PASS -#pragma omp target - { printf("PASS\n"); } - - // CHECK: PASS -#pragma omp target - { printf("%s\n", "PASS"); } - - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS - // CHECK: PASS -#pragma omp target teams num_teams(4) -#pragma omp parallel num_threads(2) - { printf("PASS\n"); } - - // CHECK: PASS - char str[] = {'P', 'A', 'S', 'S', '\0'}; -#pragma omp target map(to : str) - { printf("%s\n", str); } - - // CHECK: 11111111111 -#pragma omp target - { printf("%s%-.0f%4b%c%ld\n", "1111", 1.0, 0xf, '1', 1lu); } -} -- cgit v1.1 From 8b859c6e4a8e9ab9969582267bbdc04ed6bfa535 Mon Sep 17 00:00:00 2001 From: Cinhi Young Date: Wed, 3 Apr 2024 10:14:02 +0800 Subject: [MIPS] Fix the opcode of max.fmt and mina.fmt (#85609) - The opcode of the mina.fmt and max.fmt is documented wrong, the object code compiled from the same assembly with LLVM behaves differently than one compiled with GCC and Binutils. - Modify the opcodes to match Binutils. The actual opcodes are as follows: {5,3} | bits {2,0} of func | ... | 100 | 101 | 110 | 111 -----+-----+-----+-----+-----+----- 010 | ... | min | mina | max | maxa --- llvm/lib/Target/Mips/Mips32r6InstrInfo.td | 8 ++++---- llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt | 8 ++++---- llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt | 8 ++++---- llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt | 8 ++++---- llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt | 8 ++++---- llvm/test/MC/Mips/mips32r6/valid.s | 8 ++++---- llvm/test/MC/Mips/mips64r6/valid.s | 8 ++++---- 7 files changed, 28 insertions(+), 28 deletions(-) diff --git a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td index 9c29acb..bef7607 100644 --- a/llvm/lib/Target/Mips/Mips32r6InstrInfo.td +++ b/llvm/lib/Target/Mips/Mips32r6InstrInfo.td @@ -153,15 +153,15 @@ class SELNEZ_ENC : SPECIAL_3R_FM<0b00000, 0b110111>; class LWPC_ENC : PCREL19_FM; -class MAX_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>; -class MAX_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>; +class MAX_S_ENC : COP1_3R_FM<0b011110, FIELD_FMT_S>; +class MAX_D_ENC : COP1_3R_FM<0b011110, FIELD_FMT_D>; class MIN_S_ENC : COP1_3R_FM<0b011100, FIELD_FMT_S>; class MIN_D_ENC : COP1_3R_FM<0b011100, FIELD_FMT_D>; class MAXA_S_ENC : COP1_3R_FM<0b011111, FIELD_FMT_S>; class MAXA_D_ENC : COP1_3R_FM<0b011111, FIELD_FMT_D>; -class MINA_S_ENC : COP1_3R_FM<0b011110, FIELD_FMT_S>; -class MINA_D_ENC : COP1_3R_FM<0b011110, FIELD_FMT_D>; +class MINA_S_ENC : COP1_3R_FM<0b011101, FIELD_FMT_S>; +class MINA_D_ENC : COP1_3R_FM<0b011101, FIELD_FMT_D>; class SELEQZ_S_ENC : COP1_3R_FM<0b010100, FIELD_FMT_S>; class SELEQZ_D_ENC : COP1_3R_FM<0b010100, FIELD_FMT_D>; diff --git a/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt b/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt index 1a73178..d6f10e9 100644 --- a/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt +++ b/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6-el.txt @@ -116,14 +116,14 @@ 0x10 0x08 0x02 0x46 # CHECK: sel.s $f0, $f1, $f2 0x35 0x10 0x64 0x00 # CHECK: seleqz $2, $3, $4 0x37 0x10 0x64 0x00 # CHECK: selnez $2, $3, $4 -0x1d 0x10 0x04 0x46 # CHECK: max.s $f0, $f2, $f4 -0x1d 0x10 0x24 0x46 # CHECK: max.d $f0, $f2, $f4 0x1c 0x10 0x04 0x46 # CHECK: min.s $f0, $f2, $f4 0x1c 0x10 0x24 0x46 # CHECK: min.d $f0, $f2, $f4 +0x1d 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4 +0x1d 0x10 0x24 0x46 # CHECK: mina.d $f0, $f2, $f4 +0x1e 0x10 0x04 0x46 # CHECK: max.s $f0, $f2, $f4 +0x1e 0x10 0x24 0x46 # CHECK: max.d $f0, $f2, $f4 0x1f 0x10 0x04 0x46 # CHECK: maxa.s $f0, $f2, $f4 0x1f 0x10 0x24 0x46 # CHECK: maxa.d $f0, $f2, $f4 -0x1e 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4 -0x1e 0x10 0x24 0x46 # CHECK: mina.d $f0, $f2, $f4 0x04 0x00 0x42 0x34 # CHECK: ori $2, $2, 4 0x14 0x10 0x04 0x46 # CHECK: seleqz.s $f0, $f2, $f4 0x14 0x10 0x24 0x46 # CHECK: seleqz.d $f0, $f2, $f4 diff --git a/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt b/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt index 53ea025..e1ba009 100644 --- a/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt +++ b/llvm/test/MC/Disassembler/Mips/mips32r6/valid-mips32r6.txt @@ -92,8 +92,8 @@ 0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4 0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4 0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4 -0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4 -0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4 +0x46 0x04 0x10 0x1d # CHECK: mina.s $f0, $f2, $f4 +0x46 0x04 0x10 0x1e # CHECK: max.s $f0, $f2, $f4 0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4 0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4 0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4 @@ -103,8 +103,8 @@ 0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4 0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4 0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4 -0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4 -0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4 +0x46 0x24 0x10 0x1d # CHECK: mina.d $f0, $f2, $f4 +0x46 0x24 0x10 0x1e # CHECK: max.d $f0, $f2, $f4 0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4 0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4 0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4 diff --git a/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt b/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt index 9aeea45..a7dfbd2 100644 --- a/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt +++ b/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6-el.txt @@ -140,15 +140,15 @@ 0x43 0x00 0x50 0xec # CHECK: lwupc $2, 268 0x98 0x18 0x24 0x46 # CHECK: maddf.d $f2, $f3, $f4 0x98 0x18 0x04 0x46 # CHECK: maddf.s $f2, $f3, $f4 -0x1d 0x10 0x24 0x46 # CHECK: max.d $f0, $f2, $f4 -0x1d 0x10 0x04 0x46 # CHECK: max.s $f0, $f2, $f4 +0x1e 0x10 0x24 0x46 # CHECK: max.d $f0, $f2, $f4 +0x1e 0x10 0x04 0x46 # CHECK: max.s $f0, $f2, $f4 0x1f 0x10 0x24 0x46 # CHECK: maxa.d $f0, $f2, $f4 0x1f 0x10 0x04 0x46 # CHECK: maxa.s $f0, $f2, $f4 0x01 0x78 0x08 0x40 # CHECK: mfc0 $8, $15, 1 0x1c 0x10 0x24 0x46 # CHECK: min.d $f0, $f2, $f4 0x1c 0x10 0x04 0x46 # CHECK: min.s $f0, $f2, $f4 -0x1e 0x10 0x24 0x46 # CHECK: mina.d $f0, $f2, $f4 -0x1e 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4 +0x1d 0x10 0x24 0x46 # CHECK: mina.d $f0, $f2, $f4 +0x1d 0x10 0x04 0x46 # CHECK: mina.s $f0, $f2, $f4 0xda 0x10 0x64 0x00 # CHECK: mod $2, $3, $4 0xdb 0x10 0x64 0x00 # CHECK: modu $2, $3, $4 0x25 0x78 0xe0 0x03 # CHECK: move $15, $ra diff --git a/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt b/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt index 32b91c6..0030e51 100644 --- a/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt +++ b/llvm/test/MC/Disassembler/Mips/mips64r6/valid-mips64r6.txt @@ -111,8 +111,8 @@ 0x46 0x04 0x10 0x14 # CHECK: seleqz.s $f0, $f2, $f4 0x46 0x04 0x10 0x17 # CHECK: selnez.s $f0, $f2, $f4 0x46 0x04 0x10 0x1c # CHECK: min.s $f0, $f2, $f4 -0x46 0x04 0x10 0x1d # CHECK: max.s $f0, $f2, $f4 -0x46 0x04 0x10 0x1e # CHECK: mina.s $f0, $f2, $f4 +0x46 0x04 0x10 0x1d # CHECK: mina.s $f0, $f2, $f4 +0x46 0x04 0x10 0x1e # CHECK: max.s $f0, $f2, $f4 0x46 0x04 0x10 0x1f # CHECK: maxa.s $f0, $f2, $f4 0x46 0x04 0x18 0x98 # CHECK: maddf.s $f2, $f3, $f4 0x46 0x04 0x18 0x99 # CHECK: msubf.s $f2, $f3, $f4 @@ -122,8 +122,8 @@ 0x46 0x24 0x10 0x14 # CHECK: seleqz.d $f0, $f2, $f4 0x46 0x24 0x10 0x17 # CHECK: selnez.d $f0, $f2, $f4 0x46 0x24 0x10 0x1c # CHECK: min.d $f0, $f2, $f4 -0x46 0x24 0x10 0x1d # CHECK: max.d $f0, $f2, $f4 -0x46 0x24 0x10 0x1e # CHECK: mina.d $f0, $f2, $f4 +0x46 0x24 0x10 0x1d # CHECK: mina.d $f0, $f2, $f4 +0x46 0x24 0x10 0x1e # CHECK: max.d $f0, $f2, $f4 0x46 0x24 0x10 0x1f # CHECK: maxa.d $f0, $f2, $f4 0x46 0x24 0x18 0x98 # CHECK: maddf.d $f2, $f3, $f4 0x46 0x24 0x18 0x99 # CHECK: msubf.d $f2, $f3, $f4 diff --git a/llvm/test/MC/Mips/mips32r6/valid.s b/llvm/test/MC/Mips/mips32r6/valid.s index 0f098a1..0d705b6 100644 --- a/llvm/test/MC/Mips/mips32r6/valid.s +++ b/llvm/test/MC/Mips/mips32r6/valid.s @@ -170,14 +170,14 @@ a: sel.s $f0,$f1,$f2 # CHECK: sel.s $f0, $f1, $f2 # encoding: [0x46,0x02,0x08,0x10] seleqz $2,$3,$4 # CHECK: seleqz $2, $3, $4 # encoding: [0x00,0x64,0x10,0x35] selnez $2,$3,$4 # CHECK: selnez $2, $3, $4 # encoding: [0x00,0x64,0x10,0x37] - max.s $f0, $f2, $f4 # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d] - max.d $f0, $f2, $f4 # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d] + max.s $f0, $f2, $f4 # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e] + max.d $f0, $f2, $f4 # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e] min.s $f0, $f2, $f4 # CHECK: min.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1c] min.d $f0, $f2, $f4 # CHECK: min.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1c] maxa.s $f0, $f2, $f4 # CHECK: maxa.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1f] maxa.d $f0, $f2, $f4 # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f] - mina.s $f0, $f2, $f4 # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e] - mina.d $f0, $f2, $f4 # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e] + mina.s $f0, $f2, $f4 # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d] + mina.d $f0, $f2, $f4 # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d] or $2, 4 # CHECK: ori $2, $2, 4 # encoding: [0x34,0x42,0x00,0x04] seleqz.s $f0, $f2, $f4 # CHECK: seleqz.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x14] seleqz.d $f0, $f2, $f4 # CHECK: seleqz.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x14] diff --git a/llvm/test/MC/Mips/mips64r6/valid.s b/llvm/test/MC/Mips/mips64r6/valid.s index c50bd9e..ff6e1d7 100644 --- a/llvm/test/MC/Mips/mips64r6/valid.s +++ b/llvm/test/MC/Mips/mips64r6/valid.s @@ -183,14 +183,14 @@ a: lwupc $2,268 # CHECK: lwupc $2, 268 # encoding: [0xec,0x50,0x00,0x43] maddf.d $f2,$f3,$f4 # CHECK: maddf.d $f2, $f3, $f4 # encoding: [0x46,0x24,0x18,0x98] maddf.s $f2,$f3,$f4 # CHECK: maddf.s $f2, $f3, $f4 # encoding: [0x46,0x04,0x18,0x98] - max.d $f0, $f2, $f4 # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d] - max.s $f0, $f2, $f4 # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d] + max.d $f0, $f2, $f4 # CHECK: max.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e] + max.s $f0, $f2, $f4 # CHECK: max.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e] maxa.d $f0, $f2, $f4 # CHECK: maxa.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1f] maxa.s $f0, $f2, $f4 # CHECK: maxa.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1f] min.d $f0, $f2, $f4 # CHECK: min.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1c] min.s $f0, $f2, $f4 # CHECK: min.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1c] - mina.d $f0, $f2, $f4 # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1e] - mina.s $f0, $f2, $f4 # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1e] + mina.d $f0, $f2, $f4 # CHECK: mina.d $f0, $f2, $f4 # encoding: [0x46,0x24,0x10,0x1d] + mina.s $f0, $f2, $f4 # CHECK: mina.s $f0, $f2, $f4 # encoding: [0x46,0x04,0x10,0x1d] mfc0 $8,$15,1 # CHECK: mfc0 $8, $15, 1 # encoding: [0x40,0x08,0x78,0x01] mod $2,$3,$4 # CHECK: mod $2, $3, $4 # encoding: [0x00,0x64,0x10,0xda] modu $2,$3,$4 # CHECK: modu $2, $3, $4 # encoding: [0x00,0x64,0x10,0xdb] -- cgit v1.1 From 2fb5440e76dd61f91006d9d2831cf5c9235cd109 Mon Sep 17 00:00:00 2001 From: Vinayak Dev <104419489+vinayakdsci@users.noreply.github.com> Date: Wed, 3 Apr 2024 07:53:35 +0530 Subject: [libc] Re-organize the math function tables in docs (#87412) Re-organizes the tables that listed libc's support for math functions, and adds two new columns to the tables indicating where the respective function definitions and error handling methods are located in the C23 standard draft WG14-N3096. --- libc/docs/math/index.rst | 728 ++++++++++++++--------------------------------- 1 file changed, 219 insertions(+), 509 deletions(-) diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index b7f1b87..15aefa9 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -101,520 +101,229 @@ Implementation Status - baremetal-riscv32 - to be added + Basic Operations ----------------- - - -+------------------+---------------------------------------+-------------------+-------------------+-------------------+-------------------+ -| | Linux | Windows | MacOS | Embedded | GPU | -| +---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| | x86_64 | aarch64 | aarch32 | riscv64 | x86_64 | aarch64 | x86_64 | aarch64 | aarch32 | riscv32 | AMD | nVidia | -+==================+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+ -| ceil | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ceilf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ceill | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ceilf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| canoninicalize | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| canoninicalizef | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| canoninicalizel | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -|canoninicalizef128| |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| copysign | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| copysignf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| copysignl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| copysignf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fabs | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fabsf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fabsl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fabsf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fdim | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fdimf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fdiml | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fdimf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| floor | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| floorf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| floorl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| floorf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmax | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmaxf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmaxf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmaxl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmin | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fminf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fminf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fminl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmod | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmodf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmodl | |check| | |check| | | |check| | |check| | | | |check| | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmodf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| frexp | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| frexpf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| frexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| frexpf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfp | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpf | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpl | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpf128 | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpx | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpxf | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpxl | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fromfpxf128 | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ilogb | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ilogbf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ilogbl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ilogf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ldexp | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ldexpf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ldexpl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ldexpf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llogb | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llogbf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llogbl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llogf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llrintl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llrintf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llround | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llroundf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llroundl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| llroundf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logb | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logbf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logbl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lrint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lrintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lrintl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lrintf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lround | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lroundf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lroundl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lroundf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| modf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| modff | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| modfl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| modff128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nan | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nanf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nanl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nanf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nearbyint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nearbyintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nearbyintl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextafter | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextafterf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextafterl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextafterf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextdown | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextdownf | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextdownl | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextdownf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nexttoward | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nexttowardf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nexttowardl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextup | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextupf | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextupl | |check| | |check| | |check| | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| nextupf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remainder | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remainderf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remainderl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remquo | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remquof | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| remquol | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| rint | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| rintf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| rintl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| rintf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| round | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| roundf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| roundl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| roundf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| scalbn | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| scalbnf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| scalbnl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| trunc | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| truncf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| truncl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| truncf128 | |check| | |check| | | |check| | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfp | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpf | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpl | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpf128 | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpx | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpxf | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpxl | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| ufromfpxf128 | |check| | | | | | | | | | | | | -+------------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ +================ + ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| | (float) | (double) | (long double) | (float16) | (float128) | C23 Definition Section | C23 Error Handling Section | ++==================+==================+=================+========================+======================+========================+========================+============================+ +| ceil | |check| | |check| | |check| | | |check| | 7.12.9.1 | F.10.6.1 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| canonicalize | |check| | |check| | |check| | | |check| | 7.12.11.7 | F.10.8.7 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| copysign | |check| | |check| | |check| | | |check| | 7.12.11.1 | F.10.8.1 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dadd | N/A | N/A | | N/A | | 7.12.14.1 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ddiv | N/A | N/A | | N/A | | 7.12.14.4 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dfma | N/A | N/A | | N/A | | 7.12.14.5 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dmul | N/A | N/A | | N/A | | 7.12.14.3 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dsqrt | N/A | N/A | | N/A | | 7.12.14.6 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fabs | |check| | |check| | |check| | | |check| | 7.12.7.3 | F.10.4.3 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fdim | |check| | |check| | |check| | | |check| | 7.12.12.1 | F.10.9.1 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fdiv | N/A | | | N/A | | 7.12.14.4 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ffma | N/A | | | N/A | | 7.12.14.5 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| floor | |check| | |check| | |check| | | |check| | 7.12.9.2 | F.10.6.2 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmax | |check| | |check| | |check| | | |check| | 7.12.12.2 | F.10.9.2 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmaximum | |check| | |check| | |check| | | |check| | 7.12.12.4 | F.10.9.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmaximum_mag | |check| | |check| | |check| | | |check| | 7.12.12.6 | F.10.9.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmaximum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.10 | F.10.9.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmaximum_num | |check| | |check| | |check| | | |check| | 7.12.12.8 | F.10.9.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmin | |check| | |check| | |check| | | |check| | 7.12.12.3 | F.10.9.3 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fminimum | |check| | |check| | |check| | | |check| | 7.12.12.5 | F.10.9.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fminimum_mag | |check| | |check| | |check| | | |check| | 7.12.12.7 | F.10.9.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fminimum_mag_num | |check| | |check| | |check| | | |check| | 7.12.12.11 | F.10.9.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fminimum_num | |check| | |check| | |check| | | |check| | 7.12.12.9 | F.10.9.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmod | |check| | |check| | |check| | | |check| | 7.12.10.1 | F.10.7.1 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fmul | N/A | | | N/A | | 7.12.14.3 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| frexp | |check| | |check| | |check| | | |check| | 7.12.6.7 | F.10.3.7 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fromfp | |check| | |check| | |check| | | |check| | 7.12.9.10 | F.10.6.10 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fromfpx | |check| | |check| | |check| | | |check| | 7.12.9.11 | F.10.6.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fsub | N/A | | | N/A | | 7.12.14.2 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fsqrt | N/A | | | N/A | | 7.12.14.6 | F.10.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ilogb | |check| | |check| | |check| | | |check| | 7.12.6.8 | F.10.3.8 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ldexp | |check| | |check| | |check| | | |check| | 7.12.6.9 | F.10.3.9 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| llogb | |check| | |check| | |check| | | |check| | 7.12.6.10 | F.10.3.10 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| llrint | |check| | |check| | |check| | | |check| | 7.12.9.5 | F.10.6.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| llround | |check| | |check| | |check| | | |check| | 7.12.9.7 | F.10.6.7 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| logb | |check| | |check| | |check| | | |check| | 7.12.6.17 | F.10.3.17 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| lrint | |check| | |check| | |check| | | |check| | 7.12.9.5 | F.10.6.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| lround | |check| | |check| | |check| | | |check| | 7.12.9.7 | F.10.6.7 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| modf | |check| | |check| | |check| | | |check| | 7.12.6.18 | F.10.3.18 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nan | |check| | |check| | |check| | | |check| | 7.12.11.2 | F.10.8.2 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nearbyint | |check| | |check| | |check| | | | 7.12.9.3 | F.10.6.3 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nextafter | |check| | |check| | |check| | | |check| | 7.12.11.3 | F.10.8.3 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nextdown | |check| | |check| | |check| | | |check| | 7.12.11.6 | F.10.8.6 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nexttoward | |check| | |check| | |check| | | N/A | 7.12.11.4 | F.10.8.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| nextup | |check| | |check| | |check| | | |check| | 7.12.11.5 | F.10.8.5 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| remainder | |check| | |check| | |check| | | | 7.12.10.2 | F.10.7.2 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| remquo | |check| | |check| | |check| | | | 7.12.10.3 | F.10.7.3 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| rint | |check| | |check| | |check| | | |check| | 7.12.9.4 | F.10.6.4 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| round | |check| | |check| | |check| | | |check| | 7.12.9.6 | F.10.6.6 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| roundeven | | | | | | 7.12.9.8 | F.10.6.8 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| scalbn | |check| | |check| | |check| | | | 7.12.6.19 | F.10.3.19 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| trunc | |check| | |check| | |check| | | |check| | 7.12.9.9 | F.10.6.9 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ufromfp | |check| | |check| | |check| | | |check| | 7.12.9.10 | F.10.6.10 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| ufromfpx | |check| | |check| | |check| | | |check| | 7.12.9.11 | F.10.6.11 | ++------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ Higher Math Functions ---------------------- - -+------------+---------------------------------------+-------------------+-------------------+-------------------+-------------------+ -| | Linux | Windows | MacOS | Embedded | GPU | -| +---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| | x86_64 | aarch64 | aarch32 | riscv64 | x86_64 | aarch64 | x86_64 | aarch64 | aarch32 | riscv32 | AMD | nVidia | -+============+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+=========+ -| acos | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| acosf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| acosl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| acosh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| acoshf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| acoshl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asin | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asinf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asinl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asinh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asinhf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| asinhl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atan | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atanf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atanl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atan2 | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atan2f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atan2l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atanh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atanhf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| atanhl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cbrt | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cbrtf | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cbrtl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cos | |check| | | | | |check| | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cosf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cosl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| cosh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| coshf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| coshl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erf | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erff | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erfl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erfc | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erfcf | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| erfcl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| expf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| expl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp10 | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp10f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp10l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp2 | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp2f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| exp2l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| expm1 | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| expm1f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| expm1l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fma | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmaf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| fmal | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| hypot | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| hypotf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| hypotl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lgamma | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lgammaf | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| lgammal | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| logl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log10 | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log10f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log10l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log1p | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log1pf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log1pl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log2 | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log2f | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| log2l | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| pow | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| powf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| powl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sin | |check| | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sinf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sinl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sincos | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sincosf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sincosl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sinh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sinhf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sinhl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sqrt | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sqrtf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sqrtl | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| sqrtf128 | |check| | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tan | |check| | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tanf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tanl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tanh | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tanhf | |check| | |check| | |check| | |check| | |check| | | | |check| | |check| | |check| | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tanhl | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tgamma | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tgammaf | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ -| tgammal | | | | | | | | | | | | | -+------------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+---------+ - - -Accuracy of Higher Math Functions -================================= - -============== ================ =============== ====================== ====================== - (float) (double) (long double) (float128) -============== ================ =============== ====================== ====================== -acos |check| -acosh |check| -asin |check| -asinh |check| -atan |check| -atan2 |check| -atanh |check| -cos |check| large -cosh |check| -erf |check| -exp |check| |check| -exp10 |check| |check| -exp2 |check| |check| -expm1 |check| |check| -fma |check| |check| -hypot |check| |check| -log |check| |check| -log10 |check| |check| -log1p |check| |check| -log2 |check| |check| -pow |check| -sin |check| large -sincos |check| large -sinh |check| -sqrt |check| |check| |check| |check| -tan |check| -tanh |check| -============== ================ =============== ====================== ====================== +===================== ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| | (float) | (double) | (long double) | (float16) | (float128) | C23 Definition Section | C23 Error Handling Section | ++===========+==================+=================+========================+======================+========================+========================+============================+ +| acos | |check| | | | | | 7.12.4.1 | F.10.1.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| acosh | |check| | | | | | 7.12.5.1 | F.10.2.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| acospi | | | | | | 7.12.4.8 | F.10.1.8 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| asin | |check| | | | | | 7.12.4.2 | F.10.1.2 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| asinh | |check| | | | | | 7.12.5.2 | F.10.2.2 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| asinpi | | | | | | 7.12.4.9 | F.10.1.9 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| atan | |check| | | | | | 7.12.4.3 | F.10.1.3 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| atan2 | |check| | | | | | 7.12.4.4 | F.10.1.4 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| atan2pi | | | | | | 7.12.4.11 | F.10.1.11 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| atanh | |check| | | | | | 7.12.5.3 | F.10.2.3 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| atanpi | | | | | | 7.12.4.10 | F.10.1.10 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| cbrt | | | | | | 7.12.7.1 | F.10.4.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| compoundn | | | | | | 7.12.7.2 | F.10.4.2 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| cos | |check| | large | | | | 7.12.4.5 | F.10.1.5 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| cosh | |check| | | | | | 7.12.5.4 | F.10.2.4 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| cospi | | | | | | 7.12.4.12 | F.10.1.12 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| erf | |check| | | | | | 7.12.8.1 | F.10.5.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| erfc | | | | | | 7.12.8.2 | F.10.5.2 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| exp | |check| | |check| | | | | 7.12.6.1 | F.10.3.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| exp10 | |check| | |check| | | | | 7.12.6.2 | F.10.3.2 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| exp10m1 | | | | | | 7.12.6.3 | F.10.3.3 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| exp2 | |check| | |check| | | | | 7.12.6.4 | F.10.3.4 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| exp2m1 | | | | | | 7.12.6.5 | F.10.3.5 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| expm1 | |check| | |check| | | | | 7.12.6.6 | F.10.3.6 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fma | |check| | |check| | | | | 7.12.13.1 | F.10.10.1 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| hypot | |check| | |check| | | | | 7.12.7.4 | F.10.4.4 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| lgamma | | | | | | 7.12.8.3 | F.10.5.3 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log | |check| | |check| | | | | 7.12.6.11 | F.10.3.11 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log10 | |check| | |check| | | | | 7.12.6.12 | F.10.3.12 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log10p1 | | | | | | 7.12.6.13 | F.10.3.13 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log1p | |check| | |check| | | | | 7.12.6.14 | F.10.3.14 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log2 | |check| | |check| | | | | 7.12.6.15 | F.10.3.15 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| log2p1 | | | | | | 7.12.6.16 | F.10.3.16 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| logp1 | | | | | | 7.12.6.14 | F.10.3.14 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| pow | |check| | | | | | 7.12.7.5 | F.10.4.5 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| pown | | | | | | 7.12.7.6 | F.10.4.6 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| powr | | | | | | 7.12.7.7 | F.10.4.7 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| rootn | | | | | | 7.12.7.8 | F.10.4.8 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| rsqrt | | | | | | 7.12.7.9 | F.10.4.9 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| sin | |check| | large | | | | 7.12.4.6 | F.10.1.6 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| sincos | |check| | large | | | | | | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| sinh | |check| | | | | | 7.12.5.5 | F.10.2.5 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| sinpi | | | | | | 7.12.4.13 | F.10.1.13 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| sqrt | |check| | |check| | |check| | | |check| | 7.12.7.10 | F.10.4.10 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| tan | |check| | | | | | 7.12.4.7 | F.10.1.7 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| tanh | |check| | | | | | 7.12.5.6 | F.10.2.6 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| tanpi | | | | | | 7.12.4.14 | F.10.1.14 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| tgamma | | | | | | 7.12.8.4 | F.10.5.4 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ Legends: @@ -622,6 +331,7 @@ Legends: * CR: correctly rounded for the default rounding mode (round-to-the-nearest, tie-to-even). * x ULPs: largest errors recorded. +* N/A: Not defined in the standard or will not be added. .. TODO(lntue): Add a new page to discuss about the algorithms used in the -- cgit v1.1 From 93c16e75b8935f6a3f5f39301007f9a42a1f7da1 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Tue, 2 Apr 2024 19:34:14 -0700 Subject: [ELF] Actually sort IRELATIVE by offset The unstable partition in partitionRels might reverse IRELATIVE relocations, so stable_partition in computeRels would lead to IRELATIVE relocations ordered by decreasing offset. Use stable_partition in partitionRels to get IRELATIVE relocations ordered by increasing offset. --- lld/ELF/SyntheticSections.cpp | 3 ++- lld/test/ELF/gnu-ifunc-nonpreemptible.s | 14 ++++++++------ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index 3494352..d4dc713 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -1633,7 +1633,8 @@ void RelocationBaseSection::partitionRels() { return; const RelType relativeRel = target->relativeRel; numRelativeRelocs = - llvm::partition(relocs, [=](auto &r) { return r.type == relativeRel; }) - + std::stable_partition(relocs.begin(), relocs.end(), + [=](auto &r) { return r.type == relativeRel; }) - relocs.begin(); } diff --git a/lld/test/ELF/gnu-ifunc-nonpreemptible.s b/lld/test/ELF/gnu-ifunc-nonpreemptible.s index e03429d..f3f9008 100644 --- a/lld/test/ELF/gnu-ifunc-nonpreemptible.s +++ b/lld/test/ELF/gnu-ifunc-nonpreemptible.s @@ -23,12 +23,13 @@ # RUN: ld.lld -shared a.o b.so -o a2 # RUN: llvm-readelf -rs a2 | FileCheck %s --check-prefix=PIC -# PIC: R_X86_64_GLOB_DAT 0000000000000000 ext + 0 -# PIC-NEXT: {{0*}}[[#%x,O:]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_start + 0 -# PIC-NEXT: {{0*}}[[#O+8]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_end + 0 -# PIE-NEXT: {{0*}}[[#O+16]] [[#%x,]] R_X86_64_IRELATIVE -# PIE-NEXT: {{0*}}[[#O+24]] [[#%x,]] R_X86_64_IRELATIVE -# PIE-NEXT: {{0*}}[[#O+32]] [[#%x,]] R_X86_64_IRELATIVE +# PIC: {{0*}}[[#%x,O:]] [[#%x,]] R_X86_64_RELATIVE +# PIC-NEXT: R_X86_64_GLOB_DAT 0000000000000000 ext + 0 +# PIC-NEXT: {{0*}}[[#O-16]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_start + 0 +# PIC-NEXT: {{0*}}[[#O-8]] [[#%x,]] R_X86_64_64 0000000000000000 __rela_iplt_end + 0 +# PIE-NEXT: {{0*}}[[#O+8]] [[#%x,]] R_X86_64_IRELATIVE +# PIE-NEXT: {{0*}}[[#O+16]] [[#%x,]] R_X86_64_IRELATIVE +# PIE-NEXT: {{0*}}[[#O+24]] [[#%x,]] R_X86_64_IRELATIVE # PIC: 0 NOTYPE WEAK DEFAULT UND __rela_iplt_start # PIC-NEXT: 0 NOTYPE WEAK DEFAULT UND __rela_iplt_end @@ -83,6 +84,7 @@ _start: .data .quad __rela_iplt_start .quad __rela_iplt_end + .quad .data #--- b.s .globl ext -- cgit v1.1 From 986435c765eb6101e8a31faa7c53ec28260c6ad2 Mon Sep 17 00:00:00 2001 From: Vinayak Dev <104419489+vinayakdsci@users.noreply.github.com> Date: Wed, 3 Apr 2024 08:09:48 +0530 Subject: [libc] Move {f,d}sqrt to higher functions in docs (#87445) Moves the functions `fsqrt()` and `dsqrt()` from basic functions to higher math functions in math docs --- libc/docs/math/index.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 15aefa9..265261b 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -124,8 +124,6 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | dsub | N/A | N/A | | N/A | | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| dsqrt | N/A | N/A | | N/A | | 7.12.14.6 | F.10.11 | -+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fabs | |check| | |check| | |check| | | |check| | 7.12.7.3 | F.10.4.3 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fadd | N/A | | | N/A | | 7.12.14.1 | F.10.11 | @@ -170,8 +168,6 @@ Basic Operations +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fsub | N/A | | | N/A | | 7.12.14.2 | F.10.11 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| fsqrt | N/A | | | N/A | | 7.12.14.6 | F.10.11 | -+------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ilogb | |check| | |check| | |check| | | |check| | 7.12.6.8 | F.10.3.8 | +------------------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | ldexp | |check| | |check| | |check| | | |check| | 7.12.6.9 | F.10.3.9 | @@ -260,6 +256,8 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | cospi | | | | | | 7.12.4.12 | F.10.1.12 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| dsqrt | N/A | N/A | | N/A | | 7.12.14.6 | F.10.11 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | erf | |check| | | | | | 7.12.8.1 | F.10.5.1 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | erfc | | | | | | 7.12.8.2 | F.10.5.2 | @@ -278,6 +276,8 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | fma | |check| | |check| | | | | 7.12.13.1 | F.10.10.1 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ +| fsqrt | N/A | | | N/A | | 7.12.14.6 | F.10.11 | ++-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | hypot | |check| | |check| | | | | 7.12.7.4 | F.10.4.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | lgamma | | | | | | 7.12.8.3 | F.10.5.3 | -- cgit v1.1 From ed1cfffe9b2b2d3cc9279ff83400ace156b317a2 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 3 Apr 2024 10:38:03 +0800 Subject: [NFC] [C++20] [Modules] [Reduced BMI] Make sure the size of reduced BMI is not large than full BMI Before this patch, the size of the reduced BMI may be large than the full BMI when the source codes is pretty small. This violates the design principles. The root cause is an oversight that we skipped something in full BMI but forgot to make it in reduced BMI. --- clang/include/clang/Serialization/ASTWriter.h | 7 ++++--- clang/lib/Frontend/PrecompiledPreamble.cpp | 3 +-- clang/lib/Serialization/GeneratePCH.cpp | 19 ++++++++++++++++--- clang/test/Modules/reduced-bmi-size.cppm | 16 ++++++++++++++++ 4 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 clang/test/Modules/reduced-bmi-size.cppm diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index 3ed9803..bd310b6 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -846,7 +846,7 @@ private: /// AST and semantic-analysis consumer that generates a /// precompiled header from the parsed source code. class PCHGenerator : public SemaConsumer { - const Preprocessor &PP; + Preprocessor &PP; std::string OutputFile; std::string isysroot; Sema *SemaPtr; @@ -867,11 +867,12 @@ protected: DiagnosticsEngine &getDiagnostics() const { return SemaPtr->getDiagnostics(); } + Preprocessor &getPreprocessor() { return PP; } virtual Module *getEmittingModule(ASTContext &Ctx); public: - PCHGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, + PCHGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile, StringRef isysroot, std::shared_ptr Buffer, ArrayRef> Extensions, @@ -893,7 +894,7 @@ protected: virtual Module *getEmittingModule(ASTContext &Ctx) override; public: - ReducedBMIGenerator(const Preprocessor &PP, InMemoryModuleCache &ModuleCache, + ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile); void HandleTranslationUnit(ASTContext &Ctx) override; diff --git a/clang/lib/Frontend/PrecompiledPreamble.cpp b/clang/lib/Frontend/PrecompiledPreamble.cpp index 9b0ef30..fdf05c3 100644 --- a/clang/lib/Frontend/PrecompiledPreamble.cpp +++ b/clang/lib/Frontend/PrecompiledPreamble.cpp @@ -290,8 +290,7 @@ private: class PrecompilePreambleConsumer : public PCHGenerator { public: - PrecompilePreambleConsumer(PrecompilePreambleAction &Action, - const Preprocessor &PP, + PrecompilePreambleConsumer(PrecompilePreambleAction &Action, Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef isysroot, std::shared_ptr Buffer) diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index f54db36..fa71226 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -14,6 +14,7 @@ #include "clang/AST/ASTContext.h" #include "clang/Frontend/FrontendDiagnostic.h" #include "clang/Lex/HeaderSearch.h" +#include "clang/Lex/HeaderSearchOptions.h" #include "clang/Lex/Preprocessor.h" #include "clang/Sema/SemaConsumer.h" #include "clang/Serialization/ASTReader.h" @@ -23,8 +24,8 @@ using namespace clang; PCHGenerator::PCHGenerator( - const Preprocessor &PP, InMemoryModuleCache &ModuleCache, - StringRef OutputFile, StringRef isysroot, std::shared_ptr Buffer, + Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile, + StringRef isysroot, std::shared_ptr Buffer, ArrayRef> Extensions, bool AllowASTWithErrors, bool IncludeTimestamps, bool BuildingImplicitModule, bool ShouldCacheASTInMemory, @@ -88,7 +89,7 @@ ASTDeserializationListener *PCHGenerator::GetASTDeserializationListener() { return &Writer; } -ReducedBMIGenerator::ReducedBMIGenerator(const Preprocessor &PP, +ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor &PP, InMemoryModuleCache &ModuleCache, StringRef OutputFile) : PCHGenerator( @@ -107,6 +108,18 @@ Module *ReducedBMIGenerator::getEmittingModule(ASTContext &Ctx) { } void ReducedBMIGenerator::HandleTranslationUnit(ASTContext &Ctx) { + // We need to do this to make sure the size of reduced BMI not to be larger + // than full BMI. + // + // FIMXE: We'd better to wrap such options to a new class ASTWriterOptions + // since this is not about searching header really. + // FIXME2: We'd better to move the class writing full BMI with reduced BMI. + HeaderSearchOptions &HSOpts = + getPreprocessor().getHeaderSearchInfo().getHeaderSearchOpts(); + HSOpts.ModulesSkipDiagnosticOptions = true; + HSOpts.ModulesSkipHeaderSearchPaths = true; + HSOpts.ModulesSkipPragmaDiagnosticMappings = true; + PCHGenerator::HandleTranslationUnit(Ctx); if (!isComplete()) diff --git a/clang/test/Modules/reduced-bmi-size.cppm b/clang/test/Modules/reduced-bmi-size.cppm new file mode 100644 index 0000000..664f45f --- /dev/null +++ b/clang/test/Modules/reduced-bmi-size.cppm @@ -0,0 +1,16 @@ +// Ensure that the size of the reduced BMI is not larger than the full BMI +// in the most simple case. + +// This test requires linux commands. +// REQUIRES: system-linux + +// RUN: rm -fr %t +// RUN: mkdir %t +// +// RUN: %clang_cc1 -std=c++20 -emit-module-interface %s -o %t/a.pcm +// RUN: %clang_cc1 -std=c++20 -emit-reduced-module-interface %s -o %t/a.reduced.pcm +// +// %s implies the current source file. So we can't use it directly. +// RUN: [ $(stat -c%\s "%t/a.pcm") -le $(stat -c%\s "%t/a.reduced.pcm") ] + +export module a; -- cgit v1.1 From 83402c301982dc672e8996e1a33e7c4abf109044 Mon Sep 17 00:00:00 2001 From: Jonas Devlieghere Date: Tue, 2 Apr 2024 20:43:28 -0700 Subject: [dsymutil] Support generating dSYMs for firmware environments (#87432) Support generating dSYM companion files for (non-Darwin) firmware environments by considering the binary component of the triple in addition to the OS component. rdar://125629792 --- llvm/test/tools/dsymutil/ARM/firmware.test | 11 +++++++++++ .../tools/dsymutil/Inputs/private/tmp/firmware/test.o | Bin 0 -> 528 bytes .../tools/dsymutil/Inputs/private/tmp/firmware/test.out | Bin 0 -> 16560 bytes llvm/tools/dsymutil/DwarfLinkerForBinary.cpp | 4 +++- 4 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 llvm/test/tools/dsymutil/ARM/firmware.test create mode 100644 llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.o create mode 100755 llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out diff --git a/llvm/test/tools/dsymutil/ARM/firmware.test b/llvm/test/tools/dsymutil/ARM/firmware.test new file mode 100644 index 0000000..128faa5 --- /dev/null +++ b/llvm/test/tools/dsymutil/ARM/firmware.test @@ -0,0 +1,11 @@ +$ cat test.c +int main() { + return 0; +} + +$ xcrun clang -O0 -target arm64-apple-unknown-macho test.c -c -o test.o +$ xcrun ld -arch arm64 -o test.out test.o -platform_version firmware 0 0 + +RUN: dsymutil -oso-prepend-path %p/../Inputs %p/../Inputs/private/tmp/firmware/test.out -o %t.dSYM +RUN: llvm-objdump -h %t.dSYM/Contents/Resources/DWARF/test.out | FileCheck %s +CHECK: file format mach-o arm64 diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.o b/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.o new file mode 100644 index 0000000..3bc83ca Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.o differ diff --git a/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out b/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out new file mode 100755 index 0000000..21fe4d2 Binary files /dev/null and b/llvm/test/tools/dsymutil/Inputs/private/tmp/firmware/test.out differ diff --git a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp index 677dfc4..7246ba4 100644 --- a/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp +++ b/llvm/tools/dsymutil/DwarfLinkerForBinary.cpp @@ -857,7 +857,9 @@ bool DwarfLinkerForBinary::linkImpl( return error(toString(std::move(E))); } - if (Map.getTriple().isOSDarwin() && !Map.getBinaryPath().empty() && + auto MapTriple = Map.getTriple(); + if ((MapTriple.isOSDarwin() || MapTriple.isOSBinFormatMachO()) && + !Map.getBinaryPath().empty() && ObjectType == Linker::OutputFileType::Object) return MachOUtils::generateDsymCompanion( Options.VFS, Map, *Streamer->getAsmPrinter().OutStreamer, OutFile, -- cgit v1.1 From 324436c29ffd14bcf96c94500d5e43391f2b1e51 Mon Sep 17 00:00:00 2001 From: smanna12 Date: Tue, 2 Apr 2024 20:59:48 -0700 Subject: [Clang] Fix bugs the way we handle duplicate vs conflicting values with loop attribute 'code_align' (#87372) https://github.com/llvm/llvm-project/pull/70762 added support for new loop attribute [[clang::code_align()]]. This patch fixes bugs for the test cases below that misses diagnostics due to discontinue to while loop during checking duplicate vs conflicting code_align attribute values in routine CheckForDuplicateLoopAttrs(). [[clang::code_align(4)]] [[clang::code_align(4)]] [[clang::code_align(8)]] for(int I=0; I<128; ++I) { bar(I); } [[clang::code_align(4)]] [[clang::code_align(4)]] [[clang::code_align(8)]] [[clang::code_align(32)]] for(int I=0; I<128; ++I) { bar(I); } --- clang/lib/Sema/SemaStmtAttr.cpp | 2 +- clang/test/Sema/code_align.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) diff --git a/clang/lib/Sema/SemaStmtAttr.cpp b/clang/lib/Sema/SemaStmtAttr.cpp index 691857e..a033927 100644 --- a/clang/lib/Sema/SemaStmtAttr.cpp +++ b/clang/lib/Sema/SemaStmtAttr.cpp @@ -406,8 +406,8 @@ static void CheckForDuplicateLoopAttrs(Sema &S, ArrayRef Attrs) { << *FirstItr; S.Diag((*FirstItr)->getLocation(), diag::note_previous_attribute); } - return; } + return; } static Attr *handleMSConstexprAttr(Sema &S, Stmt *St, const ParsedAttr &A, diff --git a/clang/test/Sema/code_align.c b/clang/test/Sema/code_align.c index d494d5e..f01f513 100644 --- a/clang/test/Sema/code_align.c +++ b/clang/test/Sema/code_align.c @@ -62,6 +62,17 @@ void foo1(int A) [[clang::code_align(64)]] // expected-error{{conflicting loop attribute 'code_align'}} for(int I=0; I<128; ++I) { bar(I); } + [[clang::code_align(4)]] // expected-note{{previous attribute is here}} + [[clang::code_align(4)]] // OK + [[clang::code_align(8)]] // expected-error{{conflicting loop attribute 'code_align'}} + for(int I=0; I<128; ++I) { bar(I); } + + [[clang::code_align(4)]] // expected-note 2{{previous attribute is here}} + [[clang::code_align(4)]] // OK + [[clang::code_align(8)]] // expected-error{{conflicting loop attribute 'code_align'}} + [[clang::code_align(64)]] // expected-error{{conflicting loop attribute 'code_align'}} + for(int I=0; I<128; ++I) { bar(I); } + // expected-error@+1{{'code_align' attribute requires an integer argument which is a constant power of two between 1 and 4096 inclusive; provided argument was 7}} [[clang::code_align(7)]] for(int I=0; I<128; ++I) { bar(I); } @@ -135,6 +146,17 @@ void code_align_dependent() { [[clang::code_align(E)]] // cpp-local-error{{conflicting loop attribute 'code_align'}} for(int I=0; I<128; ++I) { bar(I); } + [[clang::code_align(A)]] // cpp-local-note{{previous attribute is here}} + [[clang::code_align(A)]] // OK + [[clang::code_align(E)]] // cpp-local-error{{conflicting loop attribute 'code_align'}} + for(int I=0; I<128; ++I) { bar(I); } + + [[clang::code_align(A)]] // cpp-local-note 2{{previous attribute is here}} + [[clang::code_align(A)]] // OK + [[clang::code_align(C)]] // cpp-local-error{{conflicting loop attribute 'code_align'}} + [[clang::code_align(E)]] // cpp-local-error{{conflicting loop attribute 'code_align'}} + for(int I=0; I<128; ++I) { bar(I); } + // cpp-local-error@+1{{'code_align' attribute requires an integer argument which is a constant power of two between 1 and 4096 inclusive; provided argument was 23}} [[clang::code_align(B)]] for(int I=0; I<128; ++I) { bar(I); } -- cgit v1.1 From 2b86fb21f8402f19da7e5887a9572b3d55052991 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Tue, 2 Apr 2024 21:03:49 -0700 Subject: [flang][runtime] Avoid recursive calls in F18 runtime CUDA build. (#87428) Recurrencies in the call graph (even if they are not executed) prevent computing the minimal stack size required for a kernel execution. This change disables some functionality of F18 IO to avoid recursive calls. A couple of functions are rewritten to work without using recursion. --- flang/include/flang/Common/api-attrs.h | 12 ++++++++++ flang/runtime/descriptor-io.h | 4 ++++ flang/runtime/edit-output.cpp | 39 +++++++++++++++++++-------------- flang/runtime/emit-encoded.h | 28 ++++++++++++++---------- flang/runtime/io-stmt.cpp | 40 ++++++++++++++++++++++++++++++++++ flang/runtime/io-stmt.h | 4 ++++ flang/runtime/unit.cpp | 20 +++++++++++++++-- flang/runtime/unit.h | 1 + 8 files changed, 118 insertions(+), 30 deletions(-) diff --git a/flang/include/flang/Common/api-attrs.h b/flang/include/flang/Common/api-attrs.h index 4d069c6..04ee307 100644 --- a/flang/include/flang/Common/api-attrs.h +++ b/flang/include/flang/Common/api-attrs.h @@ -133,6 +133,18 @@ #undef RT_DEVICE_COMPILATION #endif +/* + * Recurrence in the call graph prevents computing minimal stack size + * required for a kernel execution. This macro can be used to disable + * some F18 runtime functionality that is implemented using recurrent + * function calls or to use alternative implementation. + */ +#if (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) +#define RT_DEVICE_AVOID_RECURSION 1 +#else +#undef RT_DEVICE_AVOID_RECURSION +#endif + #if defined(__CUDACC__) #define RT_DIAG_PUSH _Pragma("nv_diagnostic push") #define RT_DIAG_POP _Pragma("nv_diagnostic pop") diff --git a/flang/runtime/descriptor-io.h b/flang/runtime/descriptor-io.h index 7063858..0b188a1 100644 --- a/flang/runtime/descriptor-io.h +++ b/flang/runtime/descriptor-io.h @@ -250,6 +250,7 @@ static RT_API_ATTRS bool DefaultComponentIO(IoStatementState &io, const typeInfo::Component &component, const Descriptor &origDescriptor, const SubscriptValue origSubscripts[], Terminator &terminator, const NonTbpDefinedIoTable *table) { +#if !defined(RT_DEVICE_AVOID_RECURSION) if (component.genre() == typeInfo::Component::Genre::Data) { // Create a descriptor for the component StaticDescriptor statDesc; @@ -266,6 +267,9 @@ static RT_API_ATTRS bool DefaultComponentIO(IoStatementState &io, const Descriptor &compDesc{*reinterpret_cast(pointer)}; return DescriptorIO(io, compDesc, table); } +#else + terminator.Crash("not yet implemented: component IO"); +#endif } template diff --git a/flang/runtime/edit-output.cpp b/flang/runtime/edit-output.cpp index b710c29..a06ed25 100644 --- a/flang/runtime/edit-output.cpp +++ b/flang/runtime/edit-output.cpp @@ -751,43 +751,50 @@ RT_API_ATTRS bool RealOutputEditing::EditEXOutput(const DataEdit &edit) { template RT_API_ATTRS bool RealOutputEditing::Edit(const DataEdit &edit) { - switch (edit.descriptor) { + const DataEdit *editPtr{&edit}; + DataEdit newEdit; + if (editPtr->descriptor == 'G') { + // Avoid recursive call as in Edit(EditForGOutput(edit)). + newEdit = EditForGOutput(*editPtr); + editPtr = &newEdit; + RUNTIME_CHECK(io_.GetIoErrorHandler(), editPtr->descriptor != 'G'); + } + switch (editPtr->descriptor) { case 'D': - return EditEorDOutput(edit); + return EditEorDOutput(*editPtr); case 'E': - if (edit.variation == 'X') { - return EditEXOutput(edit); + if (editPtr->variation == 'X') { + return EditEXOutput(*editPtr); } else { - return EditEorDOutput(edit); + return EditEorDOutput(*editPtr); } case 'F': - return EditFOutput(edit); + return EditFOutput(*editPtr); case 'B': - return EditBOZOutput<1>(io_, edit, + return EditBOZOutput<1>(io_, *editPtr, reinterpret_cast(&x_), common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'O': - return EditBOZOutput<3>(io_, edit, + return EditBOZOutput<3>(io_, *editPtr, reinterpret_cast(&x_), common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); case 'Z': - return EditBOZOutput<4>(io_, edit, + return EditBOZOutput<4>(io_, *editPtr, reinterpret_cast(&x_), common::BitsForBinaryPrecision(common::PrecisionOfRealKind(KIND)) >> 3); - case 'G': - return Edit(EditForGOutput(edit)); case 'L': - return EditLogicalOutput(io_, edit, *reinterpret_cast(&x_)); + return EditLogicalOutput( + io_, *editPtr, *reinterpret_cast(&x_)); case 'A': // legacy extension return EditCharacterOutput( - io_, edit, reinterpret_cast(&x_), sizeof x_); + io_, *editPtr, reinterpret_cast(&x_), sizeof x_); default: - if (edit.IsListDirected()) { - return EditListDirectedOutput(edit); + if (editPtr->IsListDirected()) { + return EditListDirectedOutput(*editPtr); } io_.GetIoErrorHandler().SignalError(IostatErrorInFormat, "Data edit descriptor '%c' may not be used with a REAL data item", - edit.descriptor); + editPtr->descriptor); return false; } return false; diff --git a/flang/runtime/emit-encoded.h b/flang/runtime/emit-encoded.h index ac8c7d7..4b5e390 100644 --- a/flang/runtime/emit-encoded.h +++ b/flang/runtime/emit-encoded.h @@ -18,22 +18,26 @@ namespace Fortran::runtime::io { -template +template RT_API_ATTRS bool EmitEncoded( CONTEXT &to, const CHAR *data, std::size_t chars) { ConnectionState &connection{to.GetConnectionState()}; - if (connection.access == Access::Stream && - connection.internalIoCharKind == 0) { - // Stream output: treat newlines as record advancements so that the left tab - // limit is correctly managed - while (const CHAR * nl{FindCharacter(data, CHAR{'\n'}, chars)}) { - auto pos{static_cast(nl - data)}; - if (!EmitEncoded(to, data, pos)) { - return false; + if constexpr (NL_ADVANCES_RECORD) { + if (connection.access == Access::Stream && + connection.internalIoCharKind == 0) { + // Stream output: treat newlines as record advancements so that the left + // tab limit is correctly managed + while (const CHAR * nl{FindCharacter(data, CHAR{'\n'}, chars)}) { + auto pos{static_cast(nl - data)}; + // The [data, data + pos) does not contain the newline, + // so we can avoid the recursion by calling proper specialization. + if (!EmitEncoded(to, data, pos)) { + return false; + } + data += pos + 1; + chars -= pos + 1; + to.AdvanceRecord(); } - data += pos + 1; - chars -= pos + 1; - to.AdvanceRecord(); } } if (connection.useUTF8()) { diff --git a/flang/runtime/io-stmt.cpp b/flang/runtime/io-stmt.cpp index 022e4c8..1a5d32e 100644 --- a/flang/runtime/io-stmt.cpp +++ b/flang/runtime/io-stmt.cpp @@ -220,7 +220,11 @@ ExternalIoStatementBase::ExternalIoStatementBase( MutableModes &ExternalIoStatementBase::mutableModes() { if (const ChildIo * child{unit_.GetChildIo()}) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child->parent().mutableModes(); +#else + ReportUnsupportedChildIo(); +#endif } return unit_.modes; } @@ -891,17 +895,29 @@ ChildIoStatementState::ChildIoStatementState( template MutableModes &ChildIoStatementState::mutableModes() { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().mutableModes(); +#else + ReportUnsupportedChildIo(); +#endif } template ConnectionState &ChildIoStatementState::GetConnectionState() { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().GetConnectionState(); +#else + ReportUnsupportedChildIo(); +#endif } template ExternalFileUnit *ChildIoStatementState::GetExternalFileUnit() const { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().GetExternalFileUnit(); +#else + ReportUnsupportedChildIo(); +#endif } template int ChildIoStatementState::EndIoStatement() { @@ -914,22 +930,38 @@ template int ChildIoStatementState::EndIoStatement() { template bool ChildIoStatementState::Emit( const char *data, std::size_t bytes, std::size_t elementBytes) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().Emit(data, bytes, elementBytes); +#else + ReportUnsupportedChildIo(); +#endif } template std::size_t ChildIoStatementState::GetNextInputBytes(const char *&p) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().GetNextInputBytes(p); +#else + ReportUnsupportedChildIo(); +#endif } template void ChildIoStatementState::HandleAbsolutePosition(std::int64_t n) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().HandleAbsolutePosition(n); +#else + ReportUnsupportedChildIo(); +#endif } template void ChildIoStatementState::HandleRelativePosition(std::int64_t n) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return child_.parent().HandleRelativePosition(n); +#else + ReportUnsupportedChildIo(); +#endif } template @@ -957,13 +989,21 @@ int ChildFormattedIoStatementState::EndIoStatement() { template bool ChildFormattedIoStatementState::AdvanceRecord(int n) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return this->child().parent().AdvanceRecord(n); +#else + this->ReportUnsupportedChildIo(); +#endif } template bool ChildUnformattedIoStatementState::Receive( char *data, std::size_t bytes, std::size_t elementBytes) { +#if !defined(RT_DEVICE_AVOID_RECURSION) return this->child().parent().Receive(data, bytes, elementBytes); +#else + this->ReportUnsupportedChildIo(); +#endif } template int ChildListIoStatementState::EndIoStatement() { diff --git a/flang/runtime/io-stmt.h b/flang/runtime/io-stmt.h index 8b57523..6053aeb 100644 --- a/flang/runtime/io-stmt.h +++ b/flang/runtime/io-stmt.h @@ -296,6 +296,10 @@ public: RT_API_ATTRS void BadInquiryKeywordHashCrash(InquiryKeywordHash); + RT_API_ATTRS void ReportUnsupportedChildIo() const { + Crash("not yet implemented: child IO"); + } + protected: bool completedOperation_{false}; }; diff --git a/flang/runtime/unit.cpp b/flang/runtime/unit.cpp index 6c648d3..0e38cff 100644 --- a/flang/runtime/unit.cpp +++ b/flang/runtime/unit.cpp @@ -206,7 +206,7 @@ bool ExternalFileUnit::BeginReadingRecord(IoErrorHandler &handler) { if (anyWriteSinceLastPositioning_ && access == Access::Sequential) { // Most Fortran implementations allow a READ after a WRITE; // the read then just hits an EOF. - DoEndfile(handler); + DoEndfile(handler); } recordLength.reset(); RUNTIME_CHECK(handler, isUnformatted.has_value()); @@ -671,13 +671,23 @@ void ExternalFileUnit::DoImpliedEndfile(IoErrorHandler &handler) { impliedEndfile_ = false; } +template void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) { if (IsRecordFile() && access != Access::Direct) { furthestPositionInRecord = std::max(positionInRecord, furthestPositionInRecord); if (leftTabLimit) { // last I/O was non-advancing if (access == Access::Sequential && direction_ == Direction::Output) { - AdvanceRecord(handler); + if constexpr (ANY_DIR || DIR == Direction::Output) { + // When DoEndfile() is called from BeginReadingRecord(), + // this call to AdvanceRecord() may appear as a recursion + // though it may never happen. Expose the call only + // under the constexpr direction check. + AdvanceRecord(handler); + } else { + // This check always fails if we are here. + RUNTIME_CHECK(handler, direction_ != Direction::Output); + } } else { // Access::Stream or input leftTabLimit.reset(); ++currentRecordNumber; @@ -695,6 +705,12 @@ void ExternalFileUnit::DoEndfile(IoErrorHandler &handler) { anyWriteSinceLastPositioning_ = false; } +template void ExternalFileUnit::DoEndfile(IoErrorHandler &handler); +template void ExternalFileUnit::DoEndfile( + IoErrorHandler &handler); +template void ExternalFileUnit::DoEndfile( + IoErrorHandler &handler); + void ExternalFileUnit::CommitWrites() { frameOffsetInFile_ += recordOffsetInFrame_ + recordLength.value_or(furthestPositionInRecord); diff --git a/flang/runtime/unit.h b/flang/runtime/unit.h index a6ee597..e59fbbc 100644 --- a/flang/runtime/unit.h +++ b/flang/runtime/unit.h @@ -204,6 +204,7 @@ private: RT_API_ATTRS void BackspaceVariableFormattedRecord(IoErrorHandler &); RT_API_ATTRS bool SetVariableFormattedRecordLength(); RT_API_ATTRS void DoImpliedEndfile(IoErrorHandler &); + template RT_API_ATTRS void DoEndfile(IoErrorHandler &); RT_API_ATTRS void CommitWrites(); RT_API_ATTRS bool CheckDirectAccess(IoErrorHandler &); -- cgit v1.1 From de3e05ecb22473fe9904272ec3511ad1fd62d8d0 Mon Sep 17 00:00:00 2001 From: Mingming Liu Date: Tue, 2 Apr 2024 21:45:58 -0700 Subject: [nfc]Remove the check for compressed strings in llvm/test/.../vtable_profile.ll (#87449) The check for compressed string is too restrictive (e.g. broke downstream users) and doesn't add much value to the test. Removed it. --- llvm/test/Transforms/PGOProfile/vtable_profile.ll | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/llvm/test/Transforms/PGOProfile/vtable_profile.ll b/llvm/test/Transforms/PGOProfile/vtable_profile.ll index a844003..aae1e2d 100644 --- a/llvm/test/Transforms/PGOProfile/vtable_profile.ll +++ b/llvm/test/Transforms/PGOProfile/vtable_profile.ll @@ -1,9 +1,6 @@ ; RUN: opt < %s -passes=pgo-instr-gen -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=GEN --implicit-check-not="VTable value profiling is presently not supported" ; RUN: opt < %s -passes=pgo-instr-gen,instrprof -enable-vtable-value-profiling -S 2>&1 | FileCheck %s --check-prefix=LOWER --implicit-check-not="VTable value profiling is presently not supported" -; __llvm_prf_vnm stores zlib-compressed vtable names. -; REQUIRES: zlib - source_filename = "vtable_local.ll" target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" @@ -59,7 +56,7 @@ target triple = "x86_64-unknown-linux-gnu" ; LOWER: $"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = comdat nodeduplicate ; LOWER: @__profvt__ZTV7Derived = global { i64, ptr, i32 } { i64 -4576307468236080025, ptr @_ZTV7Derived, i32 48 }, section "__llvm_prf_vtab", comdat, align 8 ; LOWER: @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E" = internal global { i64, ptr, i32 } { i64 1419990121885302679, ptr @_ZTVN12_GLOBAL__N_15Base2E, i32 24 }, section "__llvm_prf_vtab", comdat, align 8 -; LOWER: @__llvm_prf_vnm = private constant [64 x i8] c"7>x\DA\8B\8F\0A\093wI-\CA,KMa,+IL\CAI\8D\CF\C9ON\CC\D1\CB\C9\B1\8E\07J\FA\19\1A\C5\BB\FB\F8;9\FA\C4\C7\FB\C5\1B\9A:%\16\A7\1A\B9\02\00\19:\12o", section "__llvm_prf_vns", align 1 +; LOWER: @__llvm_prf_vnm = private constant {{.*}}, section "__llvm_prf_vns", align 1 ; LOWER: @llvm.used = appending global [5 x ptr] [ptr @__profvt__ZTV7Derived, ptr @"__profvt_vtable_local.ll;_ZTVN12_GLOBAL__N_15Base2E", ptr @__llvm_prf_vnodes, ptr @__llvm_prf_nm, ptr @__llvm_prf_vnm], section "llvm.metadata" define i32 @_Z4funci(i32 %a) { -- cgit v1.1 From 4ef22fce8208b9fc08da60c5e4f014ca09811b96 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Wed, 3 Apr 2024 15:29:10 +0900 Subject: [InstCombine] Simplify select if it combinated and/or/xor (#73362) `and/or/xor` operations can each be changed to sum of logical operations including operators other than themselves. `x&y -> (x|y) ^ (x^y)` `x|y -> (x&y) | (x^y)` `x^y -> (x|y) ^ (x&y)` if left of condition of `SelectInst` is `and/or/xor` logical operation and right is equal to `0, -1`, or a `constant`, and if `TrueVal` consist of `and/or/xor` logical operation then we can optimize this case. This patch implements this combination. Proof: https://alive2.llvm.org/ce/z/WW8iRR Fixes https://github.com/llvm/llvm-project/issues/71792. --- .../Transforms/InstCombine/InstCombineSelect.cpp | 106 +++ llvm/test/Transforms/InstCombine/select.ll | 794 +++++++++++++++++++++ 2 files changed, 900 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 9ab2bd8..4d3de76 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1687,6 +1687,109 @@ static Value *foldSelectInstWithICmpConst(SelectInst &SI, ICmpInst *ICI, return nullptr; } +static Instruction *foldSelectICmpEq(SelectInst &SI, ICmpInst *ICI, + InstCombinerImpl &IC) { + ICmpInst::Predicate Pred = ICI->getPredicate(); + if (!ICmpInst::isEquality(Pred)) + return nullptr; + + Value *TrueVal = SI.getTrueValue(); + Value *FalseVal = SI.getFalseValue(); + Value *CmpLHS = ICI->getOperand(0); + Value *CmpRHS = ICI->getOperand(1); + + if (Pred == ICmpInst::ICMP_NE) + std::swap(TrueVal, FalseVal); + + // Transform (X == C) ? X : Y -> (X == C) ? C : Y + // specific handling for Bitwise operation. + // x&y -> (x|y) ^ (x^y) or (x|y) & ~(x^y) + // x|y -> (x&y) | (x^y) or (x&y) ^ (x^y) + // x^y -> (x|y) ^ (x&y) or (x|y) & ~(x&y) + Value *X, *Y; + if (!match(CmpLHS, m_BitwiseLogic(m_Value(X), m_Value(Y))) || + !match(TrueVal, m_c_BitwiseLogic(m_Specific(X), m_Specific(Y)))) + return nullptr; + + const unsigned AndOps = Instruction::And, OrOps = Instruction::Or, + XorOps = Instruction::Xor, NoOps = 0; + enum NotMask { None = 0, NotInner, NotRHS }; + + auto matchFalseVal = [&](unsigned OuterOpc, unsigned InnerOpc, + unsigned NotMask) { + auto matchInner = m_c_BinOp(InnerOpc, m_Specific(X), m_Specific(Y)); + if (OuterOpc == NoOps) + return match(CmpRHS, m_Zero()) && match(FalseVal, matchInner); + + if (NotMask == NotInner) { + return match(FalseVal, + m_c_BinOp(OuterOpc, m_Not(matchInner), m_Specific(CmpRHS))); + } else if (NotMask == NotRHS) { + return match(FalseVal, + m_c_BinOp(OuterOpc, matchInner, m_Not(m_Specific(CmpRHS)))); + } else { + return match(FalseVal, + m_c_BinOp(OuterOpc, matchInner, m_Specific(CmpRHS))); + } + }; + + // (X&Y)==C ? X|Y : X^Y -> (X^Y)|C : X^Y or (X^Y)^ C : X^Y + // (X&Y)==C ? X^Y : X|Y -> (X|Y)^C : X|Y or (X|Y)&~C : X|Y + if (match(CmpLHS, m_And(m_Value(X), m_Value(Y)))) { + if (match(TrueVal, m_c_Or(m_Specific(X), m_Specific(Y)))) { + // (X&Y)==C ? X|Y : (X^Y)|C -> (X^Y)|C : (X^Y)|C -> (X^Y)|C + // (X&Y)==C ? X|Y : (X^Y)^C -> (X^Y)^C : (X^Y)^C -> (X^Y)^C + if (matchFalseVal(OrOps, XorOps, None) || + matchFalseVal(XorOps, XorOps, None)) + return IC.replaceInstUsesWith(SI, FalseVal); + } else if (match(TrueVal, m_c_Xor(m_Specific(X), m_Specific(Y)))) { + // (X&Y)==C ? X^Y : (X|Y)^ C -> (X|Y)^ C : (X|Y)^ C -> (X|Y)^ C + // (X&Y)==C ? X^Y : (X|Y)&~C -> (X|Y)&~C : (X|Y)&~C -> (X|Y)&~C + if (matchFalseVal(XorOps, OrOps, None) || + matchFalseVal(AndOps, OrOps, NotRHS)) + return IC.replaceInstUsesWith(SI, FalseVal); + } + } + + // (X|Y)==C ? X&Y : X^Y -> (X^Y)^C : X^Y or ~(X^Y)&C : X^Y + // (X|Y)==C ? X^Y : X&Y -> (X&Y)^C : X&Y or ~(X&Y)&C : X&Y + if (match(CmpLHS, m_Or(m_Value(X), m_Value(Y)))) { + if (match(TrueVal, m_c_And(m_Specific(X), m_Specific(Y)))) { + // (X|Y)==C ? X&Y: (X^Y)^C -> (X^Y)^C: (X^Y)^C -> (X^Y)^C + // (X|Y)==C ? X&Y:~(X^Y)&C ->~(X^Y)&C:~(X^Y)&C -> ~(X^Y)&C + if (matchFalseVal(XorOps, XorOps, None) || + matchFalseVal(AndOps, XorOps, NotInner)) + return IC.replaceInstUsesWith(SI, FalseVal); + } else if (match(TrueVal, m_c_Xor(m_Specific(X), m_Specific(Y)))) { + // (X|Y)==C ? X^Y : (X&Y)^C -> (X&Y)^C : (X&Y)^C -> (X&Y)^C + // (X|Y)==C ? X^Y :~(X&Y)&C -> ~(X&Y)&C :~(X&Y)&C -> ~(X&Y)&C + if (matchFalseVal(XorOps, AndOps, None) || + matchFalseVal(AndOps, AndOps, NotInner)) + return IC.replaceInstUsesWith(SI, FalseVal); + } + } + + // (X^Y)==C ? X&Y : X|Y -> (X|Y)^C : X|Y or (X|Y)&~C : X|Y + // (X^Y)==C ? X|Y : X&Y -> (X&Y)|C : X&Y or (X&Y)^ C : X&Y + if (match(CmpLHS, m_Xor(m_Value(X), m_Value(Y)))) { + if ((match(TrueVal, m_c_And(m_Specific(X), m_Specific(Y))))) { + // (X^Y)==C ? X&Y : (X|Y)^C -> (X|Y)^C + // (X^Y)==C ? X&Y : (X|Y)&~C -> (X|Y)&~C + if (matchFalseVal(XorOps, OrOps, None) || + matchFalseVal(AndOps, OrOps, NotRHS)) + return IC.replaceInstUsesWith(SI, FalseVal); + } else if (match(TrueVal, m_c_Or(m_Specific(X), m_Specific(Y)))) { + // (X^Y)==C ? (X|Y) : (X&Y)|C -> (X&Y)|C + // (X^Y)==C ? (X|Y) : (X&Y)^C -> (X&Y)^C + if (matchFalseVal(OrOps, AndOps, None) || + matchFalseVal(XorOps, AndOps, None)) + return IC.replaceInstUsesWith(SI, FalseVal); + } + } + + return nullptr; +} + /// Visit a SelectInst that has an ICmpInst as its first operand. Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI) { @@ -1729,6 +1832,9 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, } } + if (Instruction *NewSel = foldSelectICmpEq(SI, ICI, *this)) + return NewSel; + // Canonicalize a signbit condition to use zero constant by swapping: // (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV // To avoid conflicts (infinite loops) with other canonicalizations, this is diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 278cabd..05fcf66 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -3693,6 +3693,800 @@ exit: ret i32 %rem } +; Select icmp and/or/xor +; https://alive2.llvm.org/ce/z/QXQDwF +; X&Y==C?X|Y:X^Y, X&Y==C?X^Y:X|Y +; TODO: X&Y==0 could imply no_common_bit to TrueValue +define i32 @src_and_eq_0_or_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_and_eq_0_or_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, 0 + %or = or i32 %y, %x + %xor = xor i32 %y, %x + %cond = select i1 %cmp, i32 %or, i32 %xor + ret i32 %cond +} + +; TODO: X&Y==0 could imply no_common_bit to TrueValue +define i32 @src_and_eq_0_xor_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_and_eq_0_xor_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, 0 + %xor = xor i32 %y, %x + %or = or i32 %y, %x + %cond = select i1 %cmp, i32 %xor, i32 %or + ret i32 %cond +} + +; TODO: X&Y==-1 could imply all_common_bit to TrueValue +define i32 @src_and_eq_neg1_or_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_and_eq_neg1_or_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, -1 + %or = or i32 %y, %x + %xor = xor i32 %y, %x + %cond = select i1 %cmp, i32 %or, i32 %xor + ret i32 %cond +} + +; TODO: X&Y==-1 could imply all_common_bit to TrueValue +define i32 @src_and_eq_neg1_xor_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_and_eq_neg1_xor_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[AND]], -1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, -1 + %xor = xor i32 %y, %x + %or = or i32 %y, %x + %cond = select i1 %cmp, i32 %xor, i32 %or + ret i32 %cond +} + +define i32 @src_and_eq_C_or_xororC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_and_eq_C_or_xororC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[XOR]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[OR1]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, %c + %or = or i32 %y, %x + %xor = xor i32 %y, %x + %or1 = or i32 %xor, %c + %cond = select i1 %cmp, i32 %or, i32 %or1 + ret i32 %cond +} + +define i32 @src_and_eq_C_or_xorxorC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_and_eq_C_or_xorxorC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[XOR]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, %c + %or = or i32 %y, %x + %xor = xor i32 %y, %x + %xor1 = xor i32 %xor, %c + %cond = select i1 %cmp, i32 %or, i32 %xor1 + ret i32 %cond +} + +define i32 @src_and_eq_C_xor_OrAndNotC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_and_eq_C_xor_OrAndNotC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[C:%.*]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[OR]], [[NOT]] +; CHECK-NEXT: ret i32 [[AND1]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, %c + %xor = xor i32 %y, %x + %or = or i32 %y, %x + %not = xor i32 %c, -1 + %and1 = and i32 %or, %not + %cond = select i1 %cmp, i32 %xor, i32 %and1 + ret i32 %cond +} + +define i32 @src_and_eq_C_xor_orxorC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_and_eq_C_xor_orxorC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[OR]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %and = and i32 %y, %x + %cmp = icmp eq i32 %and, %c + %xor = xor i32 %y, %x + %or = or i32 %y, %x + %xor1 = xor i32 %or, %c + %cond = select i1 %cmp, i32 %xor, i32 %xor1 + ret i32 %cond +} + +; https://alive2.llvm.org/ce/z/9RPwfN +; X|Y==C?X&Y:X^Y, X|Y==C?X^Y:X&Y +; TODO: X|Y==0 could imply no_common_bit to TrueValue +define i32 @src_or_eq_0_and_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_or_eq_0_and_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, 0 + %and = and i32 %y, %x + %xor = xor i32 %y, %x + %cond = select i1 %cmp, i32 %and, i32 %xor + ret i32 %cond +} + +; TODO: X|Y==0 could imply no_common_bit to TrueValue +define i32 @src_or_eq_0_xor_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_or_eq_0_xor_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y]], [[X]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[XOR]], i32 [[AND]] +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, 0 + %xor = xor i32 %y, %x + %and = and i32 %y, %x + %cond = select i1 %cmp, i32 %xor, i32 %and + ret i32 %cond +} + +define i32 @src_or_eq_neg1_and_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_or_eq_neg1_and_xor( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, -1 + %and = and i32 %y, %x + %0 = xor i32 %x, %y + %not = xor i32 %0, -1 + %cond = select i1 %cmp, i32 %and, i32 %not + ret i32 %cond +} + +define i32 @src_or_eq_neg1_xor_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_or_eq_neg1_xor_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, -1 + %xor = xor i32 %y, %x + %and = and i32 %y, %x + %not = xor i32 %and, -1 + %cond = select i1 %cmp, i32 %xor, i32 %not + ret i32 %cond +} + +define i32 @src_or_eq_C_and_xorC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_or_eq_C_and_xorC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[XOR]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, %c + %and = and i32 %y, %x + %xor = xor i32 %y, %x + %xor1 = xor i32 %xor, %c + %cond = select i1 %cmp, i32 %and, i32 %xor1 + ret i32 %cond +} + +define i32 @src_or_eq_C_and_andnotxorC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_or_eq_C_and_andnotxorC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[TMP0]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[AND1]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, %c + %and = and i32 %y, %x + %0 = xor i32 %x, %y + %not = xor i32 %0, -1 + %and1 = and i32 %not, %c + %cond = select i1 %cmp, i32 %and, i32 %and1 + ret i32 %cond +} + +define i32 @src_or_eq_C_xor_xorandC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_or_eq_C_xor_xorandC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[AND]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, %c + %xor = xor i32 %y, %x + %and = and i32 %y, %x + %xor1 = xor i32 %and, %c + %cond = select i1 %cmp, i32 %xor, i32 %xor1 + ret i32 %cond +} + +define i32 @src_or_eq_C_xor_andnotandC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_or_eq_C_xor_andnotandC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[AND]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[NOT]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[AND1]] +; +entry: + %or = or i32 %y, %x + %cmp = icmp eq i32 %or, %c + %xor = xor i32 %y, %x + %and = and i32 %y, %x + %not = xor i32 %and, -1 + %and1 = and i32 %not, %c + %cond = select i1 %cmp, i32 %xor, i32 %and1 + ret i32 %cond +} + +; https://alive2.llvm.org/ce/z/c6oXi4 +; X^Y==C?X&Y:X|Y, X^Y==C?X|Y:X&Y +define i32 @src_xor_eq_neg1_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_xor_eq_neg1_and( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[OR]], -1 +; CHECK-NEXT: ret i32 [[NOT]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, -1 + %and = and i32 %y, %x + %or = or i32 %y, %x + %not = xor i32 %or, -1 + %cond = select i1 %cmp, i32 %and, i32 %not + ret i32 %cond +} + +; TODO: X^Y==-1 could imply no_common_bit to TrueValue +define i32 @src_xor_eq_neg1_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_xor_eq_neg1_or( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[XOR]], -1 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y]], [[X]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[CMP]], i32 [[OR]], i32 -1 +; CHECK-NEXT: ret i32 [[COND]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, -1 + %or = or i32 %y, %x + %cond = select i1 %cmp, i32 %or, i32 -1 + ret i32 %cond +} + +define i32 @src_xor_eq_C_and_xororC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_xor_eq_C_and_xororC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[OR]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, %c + %and = and i32 %y, %x + %or = or i32 %y, %x + %xor1 = xor i32 %or, %c + %cond = select i1 %cmp, i32 %and, i32 %xor1 + ret i32 %cond +} + +define i32 @src_xor_eq_C_and_andornotC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_xor_eq_C_and_andornotC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[OR:%.*]] = or i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[NOT:%.*]] = xor i32 [[C:%.*]], -1 +; CHECK-NEXT: [[AND1:%.*]] = and i32 [[OR]], [[NOT]] +; CHECK-NEXT: ret i32 [[AND1]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, %c + %and = and i32 %y, %x + %or = or i32 %y, %x + %not = xor i32 %c, -1 + %and1 = and i32 %or, %not + %cond = select i1 %cmp, i32 %and, i32 %and1 + ret i32 %cond +} + +define i32 @src_xor_eq_C_or_xorandC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_xor_eq_C_or_xorandC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[XOR1:%.*]] = xor i32 [[AND]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[XOR1]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, %c + %or = or i32 %y, %x + %and = and i32 %y, %x + %xor1 = xor i32 %and, %c + %cond = select i1 %cmp, i32 %or, i32 %xor1 + ret i32 %cond +} + +define i32 @src_xor_eq_C_or_orandC(i32 %x, i32 %y, i32 %c) { +; CHECK-LABEL: @src_xor_eq_C_or_orandC( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[AND:%.*]] = and i32 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[OR1:%.*]] = or i32 [[AND]], [[C:%.*]] +; CHECK-NEXT: ret i32 [[OR1]] +; +entry: + %xor = xor i32 %y, %x + %cmp = icmp eq i32 %xor, %c + %or = or i32 %y, %x + %and = and i32 %y, %x + %or1 = or i32 %and, %c + %cond = select i1 %cmp, i32 %or, i32 %or1 + ret i32 %cond +} + +; Select icmp and/or/xor +; NO TRANSFORMED - select condition is compare with not 0 +define i32 @src_select_and_min_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_and_min_positive_int( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 1 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 1 + %xor = xor i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %and0, i32 %or, i32 %xor + ret i32 %cond +} + +define i32 @src_select_and_max_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_and_max_positive_int( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 2147483647 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 2147483647 + %xor = xor i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %and0, i32 %or, i32 %xor + ret i32 %cond +} + +define i32 @src_select_and_min_negative_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_and_min_negative_int( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], -2147483648 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, -2147483648 + %xor = xor i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %and0, i32 %or, i32 %xor + ret i32 %cond +} + +define i32 @src_select_or_min_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_or_min_positive_int( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 1 + %and = and i32 %x, %y + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %and, i32 %xor + ret i32 %cond +} + +define i32 @src_select_or_max_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_or_max_positive_int( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 2147483647 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 2147483647 + %and = and i32 %x, %y + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %and, i32 %xor + ret i32 %cond +} + +define i32 @src_select_or_min_negative_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_or_min_negative_int( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], -2147483648 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, -2147483648 + %and = and i32 %x, %y + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %and, i32 %xor + ret i32 %cond +} + +define i32 @src_select_or_max_negative_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_or_max_negative_int( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, -1 + %and = and i32 %x, %y + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %and, i32 %xor + ret i32 %cond +} + +define i32 @src_select_xor_min_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_xor_min_positive_int( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 1 + %and = and i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %and, i32 %or + ret i32 %cond +} + +define i32 @src_select_xor_max_positive_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_xor_max_positive_int( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 2147483647 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 2147483647 + %and = and i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %and, i32 %or + ret i32 %cond +} + +define i32 @src_select_xor_min_negative_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_xor_min_negative_int( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], -2147483648 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, -2147483648 + %and = and i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %and, i32 %or + ret i32 %cond +} + +define i32 @src_select_xor_max_negative_int(i32 %x, i32 %y) { +; CHECK-LABEL: @src_select_xor_max_negative_int( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], -1 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, -1 + %and = and i32 %x, %y + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %and, i32 %or + ret i32 %cond +} + +; Select icmp and/or/xor +; https://alive2.llvm.org/ce/z/BVgrJ- +; NO TRANSFORMED - not supported +define i32 @src_no_trans_select_and_eq0_and_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_and_eq0_and_or( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 0, i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 0 + %or = or i32 %x, %y + %cond = select i1 %and0, i32 %and, i32 %or + ret i32 %cond +} + +define i32 @src_no_trans_select_and_eq0_and_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_and_eq0_and_xor( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 0, i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 0 + %xor = xor i32 %x, %y + %cond = select i1 %and0, i32 %and, i32 %xor + ret i32 %cond +} + +define i32 @src_no_trans_select_and_eq0_or_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_and_eq0_or_and( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 [[OR]], i32 [[AND]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 0 + %or = or i32 %x, %y + %cond = select i1 %and0, i32 %or, i32 %and + ret i32 %cond +} + +define i32 @src_no_trans_select_and_eq0_xor_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_and_eq0_xor_and( +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND0:%.*]] = icmp eq i32 [[AND]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[AND0]], i32 [[XOR]], i32 [[AND]] +; CHECK-NEXT: ret i32 [[COND]] +; + %and = and i32 %x, %y + %and0 = icmp eq i32 %and, 0 + %xor = xor i32 %x, %y + %cond = select i1 %and0, i32 %xor, i32 %and + ret i32 %cond +} + +define i32 @src_no_trans_select_or_eq0_or_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_or_eq0_or_and( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 0, i32 [[AND]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 0 + %and = and i32 %x, %y + %cond = select i1 %or0, i32 %or, i32 %and + ret i32 %cond +} + +define i32 @src_no_trans_select_or_eq0_or_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_or_eq0_or_xor( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 0, i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 0 + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %or, i32 %xor + ret i32 %cond +} + +define i32 @src_no_trans_select_or_eq0_and_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_or_eq0_and_or( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[AND]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 0 + %and = and i32 %x, %y + %cond = select i1 %or0, i32 %and, i32 %or + ret i32 %cond +} + +define i32 @src_no_trans_select_or_eq0_xor_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_or_eq0_xor_or( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0]], i32 [[XOR]], i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp eq i32 %or, 0 + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %xor, i32 %or + ret i32 %cond +} + +define i32 @src_no_trans_select_and_ne0_xor_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_and_ne0_xor_or( +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR0_NOT:%.*]] = icmp eq i32 [[OR]], 0 +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[OR0_NOT]], i32 0, i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %or = or i32 %x, %y + %or0 = icmp ne i32 %or, 0 + %xor = xor i32 %x, %y + %cond = select i1 %or0, i32 %xor, i32 %or + ret i32 %cond +} + +define i32 @src_no_trans_select_xor_eq0_xor_and(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_xor_eq0_xor_and( +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[AND]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 0 + %and = and i32 %x, %y + %cond = select i1 %xor0, i32 %xor, i32 %and + ret i32 %cond +} + +define i32 @src_no_trans_select_xor_eq0_xor_or(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_xor_eq0_xor_or( +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 0, i32 [[OR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 0 + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %xor, i32 %or + ret i32 %cond +} + +define i32 @src_no_trans_select_xor_eq0_and_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_xor_eq0_and_xor( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 0 +; CHECK-NEXT: [[AND:%.*]] = and i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[AND]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 0 + %and = and i32 %x, %y + %cond = select i1 %xor0, i32 %and, i32 %xor + ret i32 %cond +} + +; https://alive2.llvm.org/ce/z/SBe8ei +define i32 @src_no_trans_select_xor_eq0_or_xor(i32 %x, i32 %y) { +; CHECK-LABEL: @src_no_trans_select_xor_eq0_or_xor( +; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[XOR0:%.*]] = icmp eq i32 [[XOR]], 0 +; CHECK-NEXT: [[OR:%.*]] = or i32 [[X]], [[Y]] +; CHECK-NEXT: [[COND:%.*]] = select i1 [[XOR0]], i32 [[OR]], i32 [[XOR]] +; CHECK-NEXT: ret i32 [[COND]] +; + %xor = xor i32 %x, %y + %xor0 = icmp eq i32 %xor, 0 + %or = or i32 %x, %y + %cond = select i1 %xor0, i32 %or, i32 %xor + ret i32 %cond +} + ; (X == C) ? X : Y -> (X == C) ? C : Y ; Fixed #77553 define i32 @src_select_xxory_eq0_xorxy_y(i32 %x, i32 %y) { -- cgit v1.1 From 7edddee2aa6a6183e40784c9141afec3e2eabb95 Mon Sep 17 00:00:00 2001 From: Bevin Hansson <59652494+bevin-hansson@users.noreply.github.com> Date: Wed, 3 Apr 2024 08:45:59 +0200 Subject: [ExpandLargeFpConvert] Scalarize vector types. (#86954) expand-large-fp-convert cannot handle vector types. If overly large vector element types survive into isel, they will likely be scalarized there, but since isel cannot handle scalar integer types of that size, it will assert. Handle vector types in expand-large-fp-convert by scalarizing them and then expanding the scalar type operation. For large vectors, this results in a *massive* code expansion, but it's better than asserting. --- llvm/lib/CodeGen/ExpandLargeFpConvert.cpp | 49 ++++++- .../X86/expand-large-fp-convert-fptosi129.ll | 77 ++++++++++ .../X86/expand-large-fp-convert-fptoui129.ll | 77 ++++++++++ .../X86/expand-large-fp-convert-si129tofp.ll | 163 +++++++++++++++++++++ .../X86/expand-large-fp-convert-ui129tofp.ll | 163 +++++++++++++++++++++ 5 files changed, 521 insertions(+), 8 deletions(-) diff --git a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp index 4ec966e..6213530 100644 --- a/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp +++ b/llvm/lib/CodeGen/ExpandLargeFpConvert.cpp @@ -568,8 +568,29 @@ static void expandIToFP(Instruction *IToFP) { IToFP->eraseFromParent(); } +static void scalarize(Instruction *I, SmallVectorImpl &Replace) { + VectorType *VTy = cast(I->getType()); + + IRBuilder<> Builder(I); + + unsigned NumElements = VTy->getElementCount().getFixedValue(); + Value *Result = PoisonValue::get(VTy); + for (unsigned Idx = 0; Idx < NumElements; ++Idx) { + Value *Ext = Builder.CreateExtractElement(I->getOperand(0), Idx); + Value *Cast = Builder.CreateCast(cast(I)->getOpcode(), Ext, + I->getType()->getScalarType()); + Result = Builder.CreateInsertElement(Result, Cast, Idx); + if (isa(Cast)) + Replace.push_back(cast(Cast)); + } + I->replaceAllUsesWith(Result); + I->dropAllReferences(); + I->eraseFromParent(); +} + static bool runImpl(Function &F, const TargetLowering &TLI) { SmallVector Replace; + SmallVector ReplaceVector; bool Modified = false; unsigned MaxLegalFpConvertBitWidth = @@ -584,29 +605,36 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { switch (I.getOpcode()) { case Instruction::FPToUI: case Instruction::FPToSI: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast(I.getType()); + auto *IntTy = dyn_cast(I.getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } case Instruction::UIToFP: case Instruction::SIToFP: { - // TODO: This pass doesn't handle vectors. - if (I.getOperand(0)->getType()->isVectorTy()) + // TODO: This pass doesn't handle scalable vectors. + if (I.getOperand(0)->getType()->isScalableTy()) continue; - auto *IntTy = dyn_cast(I.getOperand(0)->getType()); + auto *IntTy = + dyn_cast(I.getOperand(0)->getType()->getScalarType()); if (IntTy->getIntegerBitWidth() <= MaxLegalFpConvertBitWidth) continue; - Replace.push_back(&I); + if (I.getOperand(0)->getType()->isVectorTy()) + ReplaceVector.push_back(&I); + else + Replace.push_back(&I); Modified = true; break; } @@ -615,6 +643,11 @@ static bool runImpl(Function &F, const TargetLowering &TLI) { } } + while (!ReplaceVector.empty()) { + Instruction *I = ReplaceVector.pop_back_val(); + scalarize(I, Replace); + } + if (Replace.empty()) return false; diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll index 75130c2..e058c5b 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptosi129.ll @@ -176,3 +176,80 @@ define i129 @fp128tosi129(fp128 %a) { %conv = fptosi fp128 %a to i129 ret i129 %conv } + +define <2 x i129> @floattosi129v2(<2 x float> %a) { +; CHECK-LABEL: @floattosi129v2( +; CHECK-NEXT: fp-to-i-entryfp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 23 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 255 +; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 8388607 +; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 8388608 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 127 +; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_END2:%.*]] +; CHECK: fp-to-i-if-end2: +; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]] +; CHECK: fp-to-i-if-then53: +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-if-end94: +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i129 [[TMP6]], 150 +; CHECK-NEXT: br i1 [[TMP13]], label [[FP_TO_I_IF_THEN125:%.*]], label [[FP_TO_I_IF_ELSE6:%.*]] +; CHECK: fp-to-i-if-then125: +; CHECK-NEXT: [[TMP14:%.*]] = sub i129 150, [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 [[TMP8]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i129 [[TMP15]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-if-else6: +; CHECK-NEXT: [[TMP17:%.*]] = add i129 [[TMP6]], -150 +; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP8]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul i129 [[TMP18]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-cleanup1: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP12]], [[FP_TO_I_IF_THEN53]] ], [ [[TMP16]], [[FP_TO_I_IF_THEN125]] ], [ [[TMP19]], [[FP_TO_I_IF_ELSE6]] ], [ 0, [[FP_TO_I_ENTRYFP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i129> poison, i129 [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[A]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP24]], 23 +; CHECK-NEXT: [[TMP28:%.*]] = and i129 [[TMP27]], 255 +; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP24]], 8388607 +; CHECK-NEXT: [[TMP30:%.*]] = or i129 [[TMP29]], 8388608 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i129 [[TMP28]], 127 +; CHECK-NEXT: br i1 [[TMP31]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129 +; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP35:%.*]] = icmp ult i129 [[TMP28]], 150 +; CHECK-NEXT: br i1 [[TMP35]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP36:%.*]] = sub i129 150, [[TMP28]] +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP30]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = mul i129 [[TMP37]], [[TMP26]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP39:%.*]] = add i129 [[TMP28]], -150 +; CHECK-NEXT: [[TMP40:%.*]] = shl i129 [[TMP30]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = mul i129 [[TMP40]], [[TMP26]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP42:%.*]] = phi i129 [ [[TMP34]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP38]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP41]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_CLEANUP1]] ] +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i129> [[TMP21]], i129 [[TMP42]], i64 1 +; CHECK-NEXT: ret <2 x i129> [[TMP43]] +; + %conv = fptosi <2 x float> %a to <2 x i129> + ret <2 x i129> %conv +} diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll index ed630d7..c699f80 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-fptoui129.ll @@ -176,3 +176,80 @@ define i129 @fp128toui129(fp128 %a) { %conv = fptoui fp128 %a to i129 ret i129 %conv } + +define <2 x i129> @floattoui129v2(<2 x float> %a) { +; CHECK-LABEL: @floattoui129v2( +; CHECK-NEXT: fp-to-i-entryfp-to-i-entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x float> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = bitcast float [[TMP0]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i129 +; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i32 [[TMP1]], -1 +; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP5:%.*]] = lshr i129 [[TMP2]], 23 +; CHECK-NEXT: [[TMP6:%.*]] = and i129 [[TMP5]], 255 +; CHECK-NEXT: [[TMP7:%.*]] = and i129 [[TMP2]], 8388607 +; CHECK-NEXT: [[TMP8:%.*]] = or i129 [[TMP7]], 8388608 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ult i129 [[TMP6]], 127 +; CHECK-NEXT: br i1 [[TMP9]], label [[FP_TO_I_CLEANUP1:%.*]], label [[FP_TO_I_IF_END2:%.*]] +; CHECK: fp-to-i-if-end2: +; CHECK-NEXT: [[TMP10:%.*]] = add i129 [[TMP6]], -256 +; CHECK-NEXT: [[TMP11:%.*]] = icmp ult i129 [[TMP10]], -129 +; CHECK-NEXT: br i1 [[TMP11]], label [[FP_TO_I_IF_THEN53:%.*]], label [[FP_TO_I_IF_END94:%.*]] +; CHECK: fp-to-i-if-then53: +; CHECK-NEXT: [[TMP12:%.*]] = select i1 [[TMP3]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-if-end94: +; CHECK-NEXT: [[TMP13:%.*]] = icmp ult i129 [[TMP6]], 150 +; CHECK-NEXT: br i1 [[TMP13]], label [[FP_TO_I_IF_THEN125:%.*]], label [[FP_TO_I_IF_ELSE6:%.*]] +; CHECK: fp-to-i-if-then125: +; CHECK-NEXT: [[TMP14:%.*]] = sub i129 150, [[TMP6]] +; CHECK-NEXT: [[TMP15:%.*]] = lshr i129 [[TMP8]], [[TMP14]] +; CHECK-NEXT: [[TMP16:%.*]] = mul i129 [[TMP15]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-if-else6: +; CHECK-NEXT: [[TMP17:%.*]] = add i129 [[TMP6]], -150 +; CHECK-NEXT: [[TMP18:%.*]] = shl i129 [[TMP8]], [[TMP17]] +; CHECK-NEXT: [[TMP19:%.*]] = mul i129 [[TMP18]], [[TMP4]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP1]] +; CHECK: fp-to-i-cleanup1: +; CHECK-NEXT: [[TMP20:%.*]] = phi i129 [ [[TMP12]], [[FP_TO_I_IF_THEN53]] ], [ [[TMP16]], [[FP_TO_I_IF_THEN125]] ], [ [[TMP19]], [[FP_TO_I_IF_ELSE6]] ], [ 0, [[FP_TO_I_ENTRYFP_TO_I_ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP21:%.*]] = insertelement <2 x i129> poison, i129 [[TMP20]], i64 0 +; CHECK-NEXT: [[TMP22:%.*]] = extractelement <2 x float> [[A]], i64 1 +; CHECK-NEXT: [[TMP23:%.*]] = bitcast float [[TMP22]] to i32 +; CHECK-NEXT: [[TMP24:%.*]] = zext i32 [[TMP23]] to i129 +; CHECK-NEXT: [[TMP25:%.*]] = icmp sgt i32 [[TMP23]], -1 +; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], i129 1, i129 -1 +; CHECK-NEXT: [[TMP27:%.*]] = lshr i129 [[TMP24]], 23 +; CHECK-NEXT: [[TMP28:%.*]] = and i129 [[TMP27]], 255 +; CHECK-NEXT: [[TMP29:%.*]] = and i129 [[TMP24]], 8388607 +; CHECK-NEXT: [[TMP30:%.*]] = or i129 [[TMP29]], 8388608 +; CHECK-NEXT: [[TMP31:%.*]] = icmp ult i129 [[TMP28]], 127 +; CHECK-NEXT: br i1 [[TMP31]], label [[FP_TO_I_CLEANUP:%.*]], label [[FP_TO_I_IF_END:%.*]] +; CHECK: fp-to-i-if-end: +; CHECK-NEXT: [[TMP32:%.*]] = add i129 [[TMP28]], -256 +; CHECK-NEXT: [[TMP33:%.*]] = icmp ult i129 [[TMP32]], -129 +; CHECK-NEXT: br i1 [[TMP33]], label [[FP_TO_I_IF_THEN5:%.*]], label [[FP_TO_I_IF_END9:%.*]] +; CHECK: fp-to-i-if-then5: +; CHECK-NEXT: [[TMP34:%.*]] = select i1 [[TMP25]], i129 340282366920938463463374607431768211455, i129 -340282366920938463463374607431768211456 +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-end9: +; CHECK-NEXT: [[TMP35:%.*]] = icmp ult i129 [[TMP28]], 150 +; CHECK-NEXT: br i1 [[TMP35]], label [[FP_TO_I_IF_THEN12:%.*]], label [[FP_TO_I_IF_ELSE:%.*]] +; CHECK: fp-to-i-if-then12: +; CHECK-NEXT: [[TMP36:%.*]] = sub i129 150, [[TMP28]] +; CHECK-NEXT: [[TMP37:%.*]] = lshr i129 [[TMP30]], [[TMP36]] +; CHECK-NEXT: [[TMP38:%.*]] = mul i129 [[TMP37]], [[TMP26]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-if-else: +; CHECK-NEXT: [[TMP39:%.*]] = add i129 [[TMP28]], -150 +; CHECK-NEXT: [[TMP40:%.*]] = shl i129 [[TMP30]], [[TMP39]] +; CHECK-NEXT: [[TMP41:%.*]] = mul i129 [[TMP40]], [[TMP26]] +; CHECK-NEXT: br label [[FP_TO_I_CLEANUP]] +; CHECK: fp-to-i-cleanup: +; CHECK-NEXT: [[TMP42:%.*]] = phi i129 [ [[TMP34]], [[FP_TO_I_IF_THEN5]] ], [ [[TMP38]], [[FP_TO_I_IF_THEN12]] ], [ [[TMP41]], [[FP_TO_I_IF_ELSE]] ], [ 0, [[FP_TO_I_CLEANUP1]] ] +; CHECK-NEXT: [[TMP43:%.*]] = insertelement <2 x i129> [[TMP21]], i129 [[TMP42]], i64 1 +; CHECK-NEXT: ret <2 x i129> [[TMP43]] +; + %conv = fptoui <2 x float> %a to <2 x i129> + ret <2 x i129> %conv +} diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll index 76f5248..f70ce2f 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-si129tofp.ll @@ -426,3 +426,166 @@ define fp128 @si129tofp128(i129 %a) { %conv = sitofp i129 %a to fp128 ret fp128 %conv } + +define <2 x float> @si129tofloatv2(<2 x i129> %a) { +; CHECK-LABEL: @si129tofloatv2( +; CHECK-NEXT: itofp-entryitofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]] +; CHECK: itofp-if-end2: +; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128 +; CHECK-NEXT: [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP4]], i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i129 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 129, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i32 128, [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]] +; CHECK: itofp-if-then43: +; CHECK-NEXT: switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB4:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG6:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb4: +; CHECK-NEXT: [[TMP10:%.*]] = shl i129 [[TMP4]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]] +; CHECK: itofp-sw-default5: +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 103, [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i129 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP4]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP6]], 26 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i129 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i129 -1, [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = and i129 [[TMP16]], [[TMP4]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i129 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i129 +; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP13]], [[TMP19]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]] +; CHECK: itofp-sw-epilog6: +; CHECK-NEXT: [[TMP21:%.*]] = phi i129 [ [[TMP20]], [[ITOFP_SW_DEFAULT5]] ], [ [[TMP4]], [[ITOFP_IF_THEN43]] ], [ [[TMP10]], [[ITOFP_SW_BB4]] ] +; CHECK-NEXT: [[TMP22:%.*]] = trunc i129 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = lshr i32 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i129 +; CHECK-NEXT: [[TMP26:%.*]] = or i129 [[TMP21]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = add i129 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = ashr i129 [[TMP27]], 2 +; CHECK-NEXT: [[A310:%.*]] = and i129 [[TMP27]], 67108864 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[A310]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = lshr i129 [[TMP28]], 32 +; CHECK-NEXT: [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32 +; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]] +; CHECK: itofp-if-then207: +; CHECK-NEXT: [[TMP33:%.*]] = ashr i129 [[TMP27]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = lshr i129 [[TMP33]], 32 +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END269]] +; CHECK: itofp-if-else8: +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP6]], -105 +; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i129 +; CHECK-NEXT: [[TMP39:%.*]] = shl i129 [[TMP4]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = trunc i129 [[TMP39]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i129 [[TMP39]], 32 +; CHECK-NEXT: [[TMP42:%.*]] = trunc i129 [[TMP41]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END269]] +; CHECK: itofp-if-end269: +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP34]], [[ITOFP_IF_THEN207]] ], [ [[TMP30]], [[ITOFP_SW_EPILOG6]] ], [ [[TMP40]], [[ITOFP_IF_ELSE8]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP7]], [[ITOFP_IF_THEN207]] ], [ [[TMP8]], [[ITOFP_SW_EPILOG6]] ], [ [[TMP8]], [[ITOFP_IF_ELSE8]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP43]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP50]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN1]] +; CHECK: itofp-return1: +; CHECK-NEXT: [[TMP53:%.*]] = phi float [ [[TMP52]], [[ITOFP_IF_END269]] ], [ 0.000000e+00, [[ITOFP_ENTRYITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1 +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0 +; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP57:%.*]] = ashr i129 [[TMP55]], 128 +; CHECK-NEXT: [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = sub i129 [[TMP58]], [[TMP57]] +; CHECK-NEXT: [[TMP60:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP59]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = trunc i129 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = sub i32 129, [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 128, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24 +; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP65:%.*]] = shl i129 [[TMP59]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP66:%.*]] = sub i32 103, [[TMP61]] +; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP66]] to i129 +; CHECK-NEXT: [[TMP68:%.*]] = lshr i129 [[TMP59]], [[TMP67]] +; CHECK-NEXT: [[TMP69:%.*]] = add i32 [[TMP61]], 26 +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i129 +; CHECK-NEXT: [[TMP71:%.*]] = lshr i129 -1, [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = and i129 [[TMP71]], [[TMP59]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i129 [[TMP72]], 0 +; CHECK-NEXT: [[TMP74:%.*]] = zext i1 [[TMP73]] to i129 +; CHECK-NEXT: [[TMP75:%.*]] = or i129 [[TMP68]], [[TMP74]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP76:%.*]] = phi i129 [ [[TMP75]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP59]], [[ITOFP_IF_THEN4]] ], [ [[TMP65]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP77:%.*]] = trunc i129 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = lshr i32 [[TMP77]], 2 +; CHECK-NEXT: [[TMP79:%.*]] = and i32 [[TMP78]], 1 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP79]] to i129 +; CHECK-NEXT: [[TMP81:%.*]] = or i129 [[TMP76]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = add i129 [[TMP81]], 1 +; CHECK-NEXT: [[TMP83:%.*]] = ashr i129 [[TMP82]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP82]], 67108864 +; CHECK-NEXT: [[TMP84:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP86:%.*]] = lshr i129 [[TMP83]], 32 +; CHECK-NEXT: [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32 +; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP88:%.*]] = ashr i129 [[TMP82]], 3 +; CHECK-NEXT: [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32 +; CHECK-NEXT: [[TMP90:%.*]] = lshr i129 [[TMP88]], 32 +; CHECK-NEXT: [[TMP91:%.*]] = trunc i129 [[TMP90]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP61]], -105 +; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i129 +; CHECK-NEXT: [[TMP94:%.*]] = shl i129 [[TMP59]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = trunc i129 [[TMP94]] to i32 +; CHECK-NEXT: [[TMP96:%.*]] = lshr i129 [[TMP94]], 32 +; CHECK-NEXT: [[TMP97:%.*]] = trunc i129 [[TMP96]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP98:%.*]] = phi i32 [ [[TMP89]], [[ITOFP_IF_THEN20]] ], [ [[TMP85]], [[ITOFP_SW_EPILOG]] ], [ [[TMP95]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP99:%.*]] = phi i32 [ [[TMP62]], [[ITOFP_IF_THEN20]] ], [ [[TMP63]], [[ITOFP_SW_EPILOG]] ], [ [[TMP63]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP100:%.*]] = trunc i129 [[TMP57]] to i32 +; CHECK-NEXT: [[TMP101:%.*]] = and i32 [[TMP100]], -2147483648 +; CHECK-NEXT: [[TMP102:%.*]] = shl i32 [[TMP99]], 23 +; CHECK-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], 1065353216 +; CHECK-NEXT: [[TMP104:%.*]] = and i32 [[TMP98]], 8388607 +; CHECK-NEXT: [[TMP105:%.*]] = or i32 [[TMP104]], [[TMP101]] +; CHECK-NEXT: [[TMP106:%.*]] = or i32 [[TMP105]], [[TMP103]] +; CHECK-NEXT: [[TMP107:%.*]] = bitcast i32 [[TMP106]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP108:%.*]] = phi float [ [[TMP107]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_RETURN1]] ] +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <2 x float> [[TMP54]], float [[TMP108]], i64 1 +; CHECK-NEXT: ret <2 x float> [[TMP109]] +; + %conv = sitofp <2 x i129> %a to <2 x float> + ret <2 x float> %conv +} diff --git a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll index 96d87a5..ee54d53 100644 --- a/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll +++ b/llvm/test/Transforms/ExpandLargeFpConvert/X86/expand-large-fp-convert-ui129tofp.ll @@ -426,3 +426,166 @@ define fp128 @ui129tofp128(i129 %a) { %conv = uitofp i129 %a to fp128 ret fp128 %conv } + +define <2 x float> @ui129tofloatv2(<2 x i129> %a) { +; CHECK-LABEL: @ui129tofloatv2( +; CHECK-NEXT: itofp-entryitofp-entry: +; CHECK-NEXT: [[TMP0:%.*]] = extractelement <2 x i129> [[A:%.*]], i64 0 +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i129 [[TMP0]], 0 +; CHECK-NEXT: br i1 [[TMP1]], label [[ITOFP_RETURN1:%.*]], label [[ITOFP_IF_END2:%.*]] +; CHECK: itofp-if-end2: +; CHECK-NEXT: [[TMP2:%.*]] = ashr i129 [[TMP0]], 128 +; CHECK-NEXT: [[TMP3:%.*]] = xor i129 [[TMP2]], [[TMP0]] +; CHECK-NEXT: [[TMP4:%.*]] = sub i129 [[TMP3]], [[TMP2]] +; CHECK-NEXT: [[TMP5:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP0]], i1 true) +; CHECK-NEXT: [[TMP6:%.*]] = trunc i129 [[TMP5]] to i32 +; CHECK-NEXT: [[TMP7:%.*]] = sub i32 129, [[TMP6]] +; CHECK-NEXT: [[TMP8:%.*]] = sub i32 128, [[TMP6]] +; CHECK-NEXT: [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 24 +; CHECK-NEXT: br i1 [[TMP9]], label [[ITOFP_IF_THEN43:%.*]], label [[ITOFP_IF_ELSE8:%.*]] +; CHECK: itofp-if-then43: +; CHECK-NEXT: switch i32 [[TMP7]], label [[ITOFP_SW_DEFAULT5:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB4:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG6:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb4: +; CHECK-NEXT: [[TMP10:%.*]] = shl i129 [[TMP0]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]] +; CHECK: itofp-sw-default5: +; CHECK-NEXT: [[TMP11:%.*]] = sub i32 103, [[TMP6]] +; CHECK-NEXT: [[TMP12:%.*]] = zext i32 [[TMP11]] to i129 +; CHECK-NEXT: [[TMP13:%.*]] = lshr i129 [[TMP0]], [[TMP12]] +; CHECK-NEXT: [[TMP14:%.*]] = add i32 [[TMP6]], 26 +; CHECK-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i129 +; CHECK-NEXT: [[TMP16:%.*]] = lshr i129 -1, [[TMP15]] +; CHECK-NEXT: [[TMP17:%.*]] = and i129 [[TMP16]], [[TMP0]] +; CHECK-NEXT: [[TMP18:%.*]] = icmp ne i129 [[TMP17]], 0 +; CHECK-NEXT: [[TMP19:%.*]] = zext i1 [[TMP18]] to i129 +; CHECK-NEXT: [[TMP20:%.*]] = or i129 [[TMP13]], [[TMP19]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG6]] +; CHECK: itofp-sw-epilog6: +; CHECK-NEXT: [[TMP21:%.*]] = phi i129 [ [[TMP20]], [[ITOFP_SW_DEFAULT5]] ], [ [[TMP0]], [[ITOFP_IF_THEN43]] ], [ [[TMP10]], [[ITOFP_SW_BB4]] ] +; CHECK-NEXT: [[TMP22:%.*]] = trunc i129 [[TMP21]] to i32 +; CHECK-NEXT: [[TMP23:%.*]] = lshr i32 [[TMP22]], 2 +; CHECK-NEXT: [[TMP24:%.*]] = and i32 [[TMP23]], 1 +; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP24]] to i129 +; CHECK-NEXT: [[TMP26:%.*]] = or i129 [[TMP21]], [[TMP25]] +; CHECK-NEXT: [[TMP27:%.*]] = add i129 [[TMP26]], 1 +; CHECK-NEXT: [[TMP28:%.*]] = lshr i129 [[TMP27]], 2 +; CHECK-NEXT: [[A310:%.*]] = and i129 [[TMP27]], 67108864 +; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i129 [[A310]], 0 +; CHECK-NEXT: [[TMP30:%.*]] = trunc i129 [[TMP28]] to i32 +; CHECK-NEXT: [[TMP31:%.*]] = lshr i129 [[TMP28]], 32 +; CHECK-NEXT: [[TMP32:%.*]] = trunc i129 [[TMP31]] to i32 +; CHECK-NEXT: br i1 [[TMP29]], label [[ITOFP_IF_END269:%.*]], label [[ITOFP_IF_THEN207:%.*]] +; CHECK: itofp-if-then207: +; CHECK-NEXT: [[TMP33:%.*]] = lshr i129 [[TMP27]], 3 +; CHECK-NEXT: [[TMP34:%.*]] = trunc i129 [[TMP33]] to i32 +; CHECK-NEXT: [[TMP35:%.*]] = lshr i129 [[TMP33]], 32 +; CHECK-NEXT: [[TMP36:%.*]] = trunc i129 [[TMP35]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END269]] +; CHECK: itofp-if-else8: +; CHECK-NEXT: [[TMP37:%.*]] = add i32 [[TMP6]], -105 +; CHECK-NEXT: [[TMP38:%.*]] = zext i32 [[TMP37]] to i129 +; CHECK-NEXT: [[TMP39:%.*]] = shl i129 [[TMP0]], [[TMP38]] +; CHECK-NEXT: [[TMP40:%.*]] = trunc i129 [[TMP39]] to i32 +; CHECK-NEXT: [[TMP41:%.*]] = lshr i129 [[TMP39]], 32 +; CHECK-NEXT: [[TMP42:%.*]] = trunc i129 [[TMP41]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END269]] +; CHECK: itofp-if-end269: +; CHECK-NEXT: [[TMP43:%.*]] = phi i32 [ [[TMP34]], [[ITOFP_IF_THEN207]] ], [ [[TMP30]], [[ITOFP_SW_EPILOG6]] ], [ [[TMP40]], [[ITOFP_IF_ELSE8]] ] +; CHECK-NEXT: [[TMP44:%.*]] = phi i32 [ [[TMP7]], [[ITOFP_IF_THEN207]] ], [ [[TMP8]], [[ITOFP_SW_EPILOG6]] ], [ [[TMP8]], [[ITOFP_IF_ELSE8]] ] +; CHECK-NEXT: [[TMP45:%.*]] = trunc i129 [[TMP2]] to i32 +; CHECK-NEXT: [[TMP46:%.*]] = and i32 [[TMP45]], -2147483648 +; CHECK-NEXT: [[TMP47:%.*]] = shl i32 [[TMP44]], 23 +; CHECK-NEXT: [[TMP48:%.*]] = add i32 [[TMP47]], 1065353216 +; CHECK-NEXT: [[TMP49:%.*]] = and i32 [[TMP43]], 8388607 +; CHECK-NEXT: [[TMP50:%.*]] = or i32 [[TMP49]], [[TMP46]] +; CHECK-NEXT: [[TMP51:%.*]] = or i32 [[TMP49]], [[TMP48]] +; CHECK-NEXT: [[TMP52:%.*]] = bitcast i32 [[TMP51]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN1]] +; CHECK: itofp-return1: +; CHECK-NEXT: [[TMP53:%.*]] = phi float [ [[TMP52]], [[ITOFP_IF_END269]] ], [ 0.000000e+00, [[ITOFP_ENTRYITOFP_ENTRY:%.*]] ] +; CHECK-NEXT: [[TMP54:%.*]] = insertelement <2 x float> poison, float [[TMP53]], i64 0 +; CHECK-NEXT: [[TMP55:%.*]] = extractelement <2 x i129> [[A]], i64 1 +; CHECK-NEXT: [[TMP56:%.*]] = icmp eq i129 [[TMP55]], 0 +; CHECK-NEXT: br i1 [[TMP56]], label [[ITOFP_RETURN:%.*]], label [[ITOFP_IF_END:%.*]] +; CHECK: itofp-if-end: +; CHECK-NEXT: [[TMP57:%.*]] = ashr i129 [[TMP55]], 128 +; CHECK-NEXT: [[TMP58:%.*]] = xor i129 [[TMP57]], [[TMP55]] +; CHECK-NEXT: [[TMP59:%.*]] = sub i129 [[TMP58]], [[TMP57]] +; CHECK-NEXT: [[TMP60:%.*]] = call i129 @llvm.ctlz.i129(i129 [[TMP55]], i1 true) +; CHECK-NEXT: [[TMP61:%.*]] = trunc i129 [[TMP60]] to i32 +; CHECK-NEXT: [[TMP62:%.*]] = sub i32 129, [[TMP61]] +; CHECK-NEXT: [[TMP63:%.*]] = sub i32 128, [[TMP61]] +; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[TMP62]], 24 +; CHECK-NEXT: br i1 [[TMP64]], label [[ITOFP_IF_THEN4:%.*]], label [[ITOFP_IF_ELSE:%.*]] +; CHECK: itofp-if-then4: +; CHECK-NEXT: switch i32 [[TMP62]], label [[ITOFP_SW_DEFAULT:%.*]] [ +; CHECK-NEXT: i32 25, label [[ITOFP_SW_BB:%.*]] +; CHECK-NEXT: i32 26, label [[ITOFP_SW_EPILOG:%.*]] +; CHECK-NEXT: ] +; CHECK: itofp-sw-bb: +; CHECK-NEXT: [[TMP65:%.*]] = shl i129 [[TMP55]], 1 +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-default: +; CHECK-NEXT: [[TMP66:%.*]] = sub i32 103, [[TMP61]] +; CHECK-NEXT: [[TMP67:%.*]] = zext i32 [[TMP66]] to i129 +; CHECK-NEXT: [[TMP68:%.*]] = lshr i129 [[TMP55]], [[TMP67]] +; CHECK-NEXT: [[TMP69:%.*]] = add i32 [[TMP61]], 26 +; CHECK-NEXT: [[TMP70:%.*]] = zext i32 [[TMP69]] to i129 +; CHECK-NEXT: [[TMP71:%.*]] = lshr i129 -1, [[TMP70]] +; CHECK-NEXT: [[TMP72:%.*]] = and i129 [[TMP71]], [[TMP55]] +; CHECK-NEXT: [[TMP73:%.*]] = icmp ne i129 [[TMP72]], 0 +; CHECK-NEXT: [[TMP74:%.*]] = zext i1 [[TMP73]] to i129 +; CHECK-NEXT: [[TMP75:%.*]] = or i129 [[TMP68]], [[TMP74]] +; CHECK-NEXT: br label [[ITOFP_SW_EPILOG]] +; CHECK: itofp-sw-epilog: +; CHECK-NEXT: [[TMP76:%.*]] = phi i129 [ [[TMP75]], [[ITOFP_SW_DEFAULT]] ], [ [[TMP55]], [[ITOFP_IF_THEN4]] ], [ [[TMP65]], [[ITOFP_SW_BB]] ] +; CHECK-NEXT: [[TMP77:%.*]] = trunc i129 [[TMP76]] to i32 +; CHECK-NEXT: [[TMP78:%.*]] = lshr i32 [[TMP77]], 2 +; CHECK-NEXT: [[TMP79:%.*]] = and i32 [[TMP78]], 1 +; CHECK-NEXT: [[TMP80:%.*]] = zext i32 [[TMP79]] to i129 +; CHECK-NEXT: [[TMP81:%.*]] = or i129 [[TMP76]], [[TMP80]] +; CHECK-NEXT: [[TMP82:%.*]] = add i129 [[TMP81]], 1 +; CHECK-NEXT: [[TMP83:%.*]] = lshr i129 [[TMP82]], 2 +; CHECK-NEXT: [[A3:%.*]] = and i129 [[TMP82]], 67108864 +; CHECK-NEXT: [[TMP84:%.*]] = icmp eq i129 [[A3]], 0 +; CHECK-NEXT: [[TMP85:%.*]] = trunc i129 [[TMP83]] to i32 +; CHECK-NEXT: [[TMP86:%.*]] = lshr i129 [[TMP83]], 32 +; CHECK-NEXT: [[TMP87:%.*]] = trunc i129 [[TMP86]] to i32 +; CHECK-NEXT: br i1 [[TMP84]], label [[ITOFP_IF_END26:%.*]], label [[ITOFP_IF_THEN20:%.*]] +; CHECK: itofp-if-then20: +; CHECK-NEXT: [[TMP88:%.*]] = lshr i129 [[TMP82]], 3 +; CHECK-NEXT: [[TMP89:%.*]] = trunc i129 [[TMP88]] to i32 +; CHECK-NEXT: [[TMP90:%.*]] = lshr i129 [[TMP88]], 32 +; CHECK-NEXT: [[TMP91:%.*]] = trunc i129 [[TMP90]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-else: +; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP61]], -105 +; CHECK-NEXT: [[TMP93:%.*]] = zext i32 [[TMP92]] to i129 +; CHECK-NEXT: [[TMP94:%.*]] = shl i129 [[TMP55]], [[TMP93]] +; CHECK-NEXT: [[TMP95:%.*]] = trunc i129 [[TMP94]] to i32 +; CHECK-NEXT: [[TMP96:%.*]] = lshr i129 [[TMP94]], 32 +; CHECK-NEXT: [[TMP97:%.*]] = trunc i129 [[TMP96]] to i32 +; CHECK-NEXT: br label [[ITOFP_IF_END26]] +; CHECK: itofp-if-end26: +; CHECK-NEXT: [[TMP98:%.*]] = phi i32 [ [[TMP89]], [[ITOFP_IF_THEN20]] ], [ [[TMP85]], [[ITOFP_SW_EPILOG]] ], [ [[TMP95]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP99:%.*]] = phi i32 [ [[TMP62]], [[ITOFP_IF_THEN20]] ], [ [[TMP63]], [[ITOFP_SW_EPILOG]] ], [ [[TMP63]], [[ITOFP_IF_ELSE]] ] +; CHECK-NEXT: [[TMP100:%.*]] = trunc i129 [[TMP57]] to i32 +; CHECK-NEXT: [[TMP101:%.*]] = and i32 [[TMP100]], -2147483648 +; CHECK-NEXT: [[TMP102:%.*]] = shl i32 [[TMP99]], 23 +; CHECK-NEXT: [[TMP103:%.*]] = add i32 [[TMP102]], 1065353216 +; CHECK-NEXT: [[TMP104:%.*]] = and i32 [[TMP98]], 8388607 +; CHECK-NEXT: [[TMP105:%.*]] = or i32 [[TMP104]], [[TMP101]] +; CHECK-NEXT: [[TMP106:%.*]] = or i32 [[TMP104]], [[TMP103]] +; CHECK-NEXT: [[TMP107:%.*]] = bitcast i32 [[TMP106]] to float +; CHECK-NEXT: br label [[ITOFP_RETURN]] +; CHECK: itofp-return: +; CHECK-NEXT: [[TMP108:%.*]] = phi float [ [[TMP107]], [[ITOFP_IF_END26]] ], [ 0.000000e+00, [[ITOFP_RETURN1]] ] +; CHECK-NEXT: [[TMP109:%.*]] = insertelement <2 x float> [[TMP54]], float [[TMP108]], i64 1 +; CHECK-NEXT: ret <2 x float> [[TMP109]] +; + %conv = uitofp <2 x i129> %a to <2 x float> + ret <2 x float> %conv +} -- cgit v1.1 From a75b3e949da588bafd521eff6d265f3ea2f854c2 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 3 Apr 2024 13:14:07 +0800 Subject: [NFC] [Serialization] Extract logics to write decls and types into a standalone function This patch extract logics in ASTWriter::WriteASTCore about writing decls and types into a standalone function. The WriteASTCore function is pretty long and hard to read. It should be helpful for readability to extract the common logics into a standalone function. This is also helpful for further changes e.g., removing unreachable declarations. --- clang/include/clang/Serialization/ASTWriter.h | 1 + clang/lib/Serialization/ASTWriter.cpp | 134 +++++++++++++------------- 2 files changed, 68 insertions(+), 67 deletions(-) diff --git a/clang/include/clang/Serialization/ASTWriter.h b/clang/include/clang/Serialization/ASTWriter.h index bd310b6..214eb36 100644 --- a/clang/include/clang/Serialization/ASTWriter.h +++ b/clang/include/clang/Serialization/ASTWriter.h @@ -542,6 +542,7 @@ private: void WriteReferencedSelectorsPool(Sema &SemaRef); void WriteIdentifierTable(Preprocessor &PP, IdentifierResolver &IdResolver, bool IsModule); + void WriteDeclAndTypes(ASTContext &Context); void WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord); void WriteDeclContextVisibleUpdate(const DeclContext *DC); void WriteFPPragmaOptions(const FPOptionsOverride &Opts); diff --git a/clang/lib/Serialization/ASTWriter.cpp b/clang/lib/Serialization/ASTWriter.cpp index 0148eb4..ba6a8a5 100644 --- a/clang/lib/Serialization/ASTWriter.cpp +++ b/clang/lib/Serialization/ASTWriter.cpp @@ -5107,69 +5107,7 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, for (auto *D : SemaRef.DeclsToCheckForDeferredDiags) DeclsToCheckForDeferredDiags.push_back(GetDeclRef(D)); - { - auto Abv = std::make_shared(); - Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); - } - - RecordData DeclUpdatesOffsetsRecord; - - // Keep writing types, declarations, and declaration update records - // until we've emitted all of them. - Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5); - DeclTypesBlockStartOffset = Stream.GetCurrentBitNo(); - WriteTypeAbbrevs(); - WriteDeclAbbrevs(); - do { - WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord); - while (!DeclTypesToEmit.empty()) { - DeclOrType DOT = DeclTypesToEmit.front(); - DeclTypesToEmit.pop(); - if (DOT.isType()) - WriteType(DOT.getType()); - else - WriteDecl(Context, DOT.getDecl()); - } - } while (!DeclUpdates.empty()); - Stream.ExitBlock(); - - DoneWritingDeclsAndTypes = true; - - // These things can only be done once we've written out decls and types. - WriteTypeDeclOffsets(); - if (!DeclUpdatesOffsetsRecord.empty()) - Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord); - - // Create a lexical update block containing all of the declarations in the - // translation unit that do not come from other AST files. - { - SmallVector NewGlobalKindDeclPairs; - for (const auto *D : TU->noload_decls()) { - if (!D->isFromASTFile()) { - NewGlobalKindDeclPairs.push_back(D->getKind()); - NewGlobalKindDeclPairs.push_back(GetDeclRef(D)); - } - } - - auto Abv = std::make_shared(); - Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); - Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); - unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); - - RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; - Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, - bytes(NewGlobalKindDeclPairs)); - } - - // And a visible updates block for the translation unit. - WriteDeclContextVisibleUpdate(TU); - - // If we have any extern "C" names, write out a visible update for them. - if (Context.ExternCContext) - WriteDeclContextVisibleUpdate(Context.ExternCContext); + WriteDeclAndTypes(Context); WriteFileDeclIDsMap(); WriteSourceManagerBlock(Context.getSourceManager(), PP); @@ -5255,10 +5193,6 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, if (!DeleteExprsToAnalyze.empty()) Stream.EmitRecord(DELETE_EXPRS_TO_ANALYZE, DeleteExprsToAnalyze); - // Write the visible updates to DeclContexts. - for (auto *DC : UpdatedDeclContexts) - WriteDeclContextVisibleUpdate(DC); - if (!WritingModule) { // Write the submodules that were imported, if any. struct ModuleInfo { @@ -5323,6 +5257,72 @@ ASTFileSignature ASTWriter::WriteASTCore(Sema &SemaRef, StringRef isysroot, return backpatchSignature(); } +void ASTWriter::WriteDeclAndTypes(ASTContext &Context) { + // Keep writing types, declarations, and declaration update records + // until we've emitted all of them. + RecordData DeclUpdatesOffsetsRecord; + Stream.EnterSubblock(DECLTYPES_BLOCK_ID, /*bits for abbreviations*/5); + DeclTypesBlockStartOffset = Stream.GetCurrentBitNo(); + WriteTypeAbbrevs(); + WriteDeclAbbrevs(); + do { + WriteDeclUpdatesBlocks(DeclUpdatesOffsetsRecord); + while (!DeclTypesToEmit.empty()) { + DeclOrType DOT = DeclTypesToEmit.front(); + DeclTypesToEmit.pop(); + if (DOT.isType()) + WriteType(DOT.getType()); + else + WriteDecl(Context, DOT.getDecl()); + } + } while (!DeclUpdates.empty()); + Stream.ExitBlock(); + + DoneWritingDeclsAndTypes = true; + + // These things can only be done once we've written out decls and types. + WriteTypeDeclOffsets(); + if (!DeclUpdatesOffsetsRecord.empty()) + Stream.EmitRecord(DECL_UPDATE_OFFSETS, DeclUpdatesOffsetsRecord); + + const TranslationUnitDecl *TU = Context.getTranslationUnitDecl(); + // Create a lexical update block containing all of the declarations in the + // translation unit that do not come from other AST files. + SmallVector NewGlobalKindDeclPairs; + for (const auto *D : TU->noload_decls()) { + if (!D->isFromASTFile()) { + NewGlobalKindDeclPairs.push_back(D->getKind()); + NewGlobalKindDeclPairs.push_back(GetDeclRef(D)); + } + } + + auto Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(TU_UPDATE_LEXICAL)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + unsigned TuUpdateLexicalAbbrev = Stream.EmitAbbrev(std::move(Abv)); + + RecordData::value_type Record[] = {TU_UPDATE_LEXICAL}; + Stream.EmitRecordWithBlob(TuUpdateLexicalAbbrev, Record, + bytes(NewGlobalKindDeclPairs)); + + Abv = std::make_shared(); + Abv->Add(llvm::BitCodeAbbrevOp(UPDATE_VISIBLE)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::VBR, 6)); + Abv->Add(llvm::BitCodeAbbrevOp(llvm::BitCodeAbbrevOp::Blob)); + UpdateVisibleAbbrev = Stream.EmitAbbrev(std::move(Abv)); + + // And a visible updates block for the translation unit. + WriteDeclContextVisibleUpdate(TU); + + // If we have any extern "C" names, write out a visible update for them. + if (Context.ExternCContext) + WriteDeclContextVisibleUpdate(Context.ExternCContext); + + // Write the visible updates to DeclContexts. + for (auto *DC : UpdatedDeclContexts) + WriteDeclContextVisibleUpdate(DC); +} + void ASTWriter::WriteDeclUpdatesBlocks(RecordDataImpl &OffsetsRecord) { if (DeclUpdates.empty()) return; -- cgit v1.1 From 468dc32ff55d19f55132cbcc4d6ceb1f6d1c12cf Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 3 Apr 2024 13:40:25 +0800 Subject: [NFC] Make `DeclContext::noload_lookup()` accept transparent context Now the `DeclContext::noload_lookup()` asserts that 'this' is not a transparent context. However, this is not consistent with `DeclContext::lookup()`, which will lookup into its parent context if 'this' is a transparent context. This patch makes the behavior of `DeclContext::noload_lookup()` to be consistent with `DeclContext::lookup()`, to lookup into the parent context if 'this' is a transparent context. --- clang/lib/AST/DeclBase.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/DeclBase.cpp b/clang/lib/AST/DeclBase.cpp index 2cbb86b31..66a727d 100644 --- a/clang/lib/AST/DeclBase.cpp +++ b/clang/lib/AST/DeclBase.cpp @@ -1852,9 +1852,9 @@ DeclContext::lookup(DeclarationName Name) const { DeclContext::lookup_result DeclContext::noload_lookup(DeclarationName Name) { - assert(getDeclKind() != Decl::LinkageSpec && - getDeclKind() != Decl::Export && - "should not perform lookups into transparent contexts"); + // For transparent DeclContext, we should lookup in their enclosing context. + if (getDeclKind() == Decl::LinkageSpec || getDeclKind() == Decl::Export) + return getParent()->noload_lookup(Name); DeclContext *PrimaryContext = getPrimaryContext(); if (PrimaryContext != this) -- cgit v1.1 From 4b25053ae47f50095371a663391baadfd2694eb0 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Wed, 3 Apr 2024 15:21:34 +0800 Subject: [Win32][ELF] Make CodeView a DebugInfoFormat only for COFF format (#87149) We have many problems to use CodeView for a win32-elf target, e.g., #87140 and `error: .seh_* directives are not supported on this target`. Fixes: #87140 --- clang/lib/Driver/ToolChains/MSVC.h | 5 ++--- clang/test/Misc/win32-elf.c | 5 +++++ 2 files changed, 7 insertions(+), 3 deletions(-) create mode 100644 clang/test/Misc/win32-elf.c diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h index 48369e0..3950a8e 100644 --- a/clang/lib/Driver/ToolChains/MSVC.h +++ b/clang/lib/Driver/ToolChains/MSVC.h @@ -61,9 +61,8 @@ public: /// formats, and to DWARF otherwise. Users can use -gcodeview and -gdwarf to /// override the default. llvm::codegenoptions::DebugInfoFormat getDefaultDebugFormat() const override { - return getTriple().isOSBinFormatMachO() - ? llvm::codegenoptions::DIF_DWARF - : llvm::codegenoptions::DIF_CodeView; + return getTriple().isOSBinFormatCOFF() ? llvm::codegenoptions::DIF_CodeView + : llvm::codegenoptions::DIF_DWARF; } /// Set the debugger tuning to "default", since we're definitely not tuning diff --git a/clang/test/Misc/win32-elf.c b/clang/test/Misc/win32-elf.c new file mode 100644 index 0000000..f75281d --- /dev/null +++ b/clang/test/Misc/win32-elf.c @@ -0,0 +1,5 @@ +// Check that basic use of win32-elf targets works. +// RUN: %clang -fsyntax-only -target x86_64-pc-win32-elf %s + +// RUN: %clang -fsyntax-only -target x86_64-pc-win32-elf -g %s -### 2>&1 | FileCheck %s -check-prefix=DEBUG-INFO +// DEBUG-INFO: -dwarf-version={{.*}} -- cgit v1.1 From 6288f36c1640ee1f50fe35e07a97c50355066f27 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 3 Apr 2024 08:44:51 +0100 Subject: [AArch64][GlobalISel] Basic add_sat and sub_sat vector handling. (#80650) This tries to fill in the basic vector handling for sadd_sat/uadd_sat and ssub_sat/usub_sat. It just handles the basics, marking legal types and clamping illegally sized vectors to legal ones. --- .../Target/AArch64/GISel/AArch64LegalizerInfo.cpp | 13 +- .../GlobalISel/legalizer-info-validation.mir | 1 + llvm/test/CodeGen/AArch64/sadd_sat.ll | 2 - llvm/test/CodeGen/AArch64/sadd_sat_vec.ll | 299 +++++++++++++-------- llvm/test/CodeGen/AArch64/ssub_sat.ll | 2 - llvm/test/CodeGen/AArch64/ssub_sat_vec.ll | 299 +++++++++++++-------- llvm/test/CodeGen/AArch64/uadd_sat_vec.ll | 295 +++++++++++++------- llvm/test/CodeGen/AArch64/usub_sat_vec.ll | 291 +++++++++++++------- 8 files changed, 775 insertions(+), 427 deletions(-) diff --git a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp index 33dba6a5..043f142 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64LegalizerInfo.cpp @@ -1141,9 +1141,6 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .scalarize(1) .lower(); - getActionDefinitionsBuilder({G_UADDSAT, G_USUBSAT}) - .lowerIf([=](const LegalityQuery &Q) { return Q.Types[0].isScalar(); }); - getActionDefinitionsBuilder({G_FSHL, G_FSHR}) .customFor({{s32, s32}, {s32, s64}, {s64, s64}}) .lower(); @@ -1191,8 +1188,14 @@ AArch64LegalizerInfo::AArch64LegalizerInfo(const AArch64Subtarget &ST) .minScalarEltSameAsIf(always, 1, 0) .maxScalarEltSameAsIf(always, 1, 0); - // TODO: Vector types. - getActionDefinitionsBuilder({G_SADDSAT, G_SSUBSAT}).lowerIf(isScalar(0)); + getActionDefinitionsBuilder({G_UADDSAT, G_SADDSAT, G_USUBSAT, G_SSUBSAT}) + .legalFor({v2s64, v2s32, v4s32, v4s16, v8s16, v8s8, v16s8}) + .clampNumElements(0, v8s8, v16s8) + .clampNumElements(0, v4s16, v8s16) + .clampNumElements(0, v2s32, v4s32) + .clampMaxNumElements(0, s64, 2) + .moreElementsToNextPow2(0) + .lower(); // TODO: Libcall support for s128. // TODO: s16 should be legal with full FP16 support. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir index ac3c47c..200e9d1 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/legalizer-info-validation.mir @@ -395,6 +395,7 @@ # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_SADDSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices +# DEBUG-NEXT: .. opcode {{[0-9]+}} is aliased to {{[0-9]+}} # DEBUG-NEXT: .. type index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: .. imm index coverage check SKIPPED: user-defined predicate detected # DEBUG-NEXT: G_USUBSAT (opcode {{[0-9]+}}): 1 type index, 0 imm indices diff --git a/llvm/test/CodeGen/AArch64/sadd_sat.ll b/llvm/test/CodeGen/AArch64/sadd_sat.ll index 9e09b7f..789fd7b 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.sadd.sat.i4(i4, i4) declare i8 @llvm.sadd.sat.i8(i8, i8) declare i16 @llvm.sadd.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll index 6f1ae02..8a0e766 100644 --- a/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/sadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.sadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.sadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.sadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.sadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.sadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,19 +145,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqadd v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqadd v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -196,23 +229,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqadd v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqadd v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -230,15 +277,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.sadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -346,23 +405,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.sadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.sadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -377,23 +450,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.sadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.sadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/ssub_sat.ll b/llvm/test/CodeGen/AArch64/ssub_sat.ll index abeb4b3..4d755f4 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat.ll @@ -2,8 +2,6 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for vec - declare i4 @llvm.ssub.sat.i4(i4, i4) declare i8 @llvm.ssub.sat.i8(i8, i8) declare i16 @llvm.ssub.sat.i16(i16, i16) diff --git a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll index d1f843a..a8c1276 100644 --- a/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/ssub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.ssub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.ssub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: sqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: sqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: sqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: sqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: sqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: sqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.ssub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.ssub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: sqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: sqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: sqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: sqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: sqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: sqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.ssub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,19 +146,42 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: sshll v0.8h, v0.8b, #0 -; CHECK-NEXT: sshll v1.8h, v1.8b, #0 -; CHECK-NEXT: shl v1.4h, v1.4h, #8 -; CHECK-NEXT: shl v0.4h, v0.4h, #8 -; CHECK-NEXT: sqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: sshr v0.4h, v0.4h, #8 -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: sshll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: shl v1.4h, v1.4h, #8 +; CHECK-SD-NEXT: shl v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: sqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: sshr v0.4h, v0.4h, #8 +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: sqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -197,23 +230,37 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ld1 { v0.h }[0], [x0] -; CHECK-NEXT: ld1 { v1.h }[0], [x1] -; CHECK-NEXT: add x8, x0, #2 -; CHECK-NEXT: add x9, x1, #2 -; CHECK-NEXT: ld1 { v0.h }[2], [x8] -; CHECK-NEXT: ld1 { v1.h }[2], [x9] -; CHECK-NEXT: shl v1.2s, v1.2s, #16 -; CHECK-NEXT: shl v0.2s, v0.2s, #16 -; CHECK-NEXT: sqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: ushr v0.2s, v0.2s, #16 -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ld1 { v0.h }[0], [x0] +; CHECK-SD-NEXT: ld1 { v1.h }[0], [x1] +; CHECK-SD-NEXT: add x8, x0, #2 +; CHECK-SD-NEXT: add x9, x1, #2 +; CHECK-SD-NEXT: ld1 { v0.h }[2], [x8] +; CHECK-SD-NEXT: ld1 { v1.h }[2], [x9] +; CHECK-SD-NEXT: shl v1.2s, v1.2s, #16 +; CHECK-SD-NEXT: shl v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: sqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: ushr v0.2s, v0.2s, #16 +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: sqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -231,15 +278,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: sqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: sqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: sqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: sqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: sqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: sqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.ssub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -349,23 +408,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.ssub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: sqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: sqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: sqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: sqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: sqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: sqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.ssub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -380,23 +453,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.ssub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: sqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: sqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: sqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: sqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: sqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: sqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: sqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: sqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.ssub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll index f0bbed5..30ff700 100644 --- a/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/uadd_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.uadd.sat.v1i8(<1 x i8>, <1 x i8>) @@ -67,23 +49,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.uadd.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqadd v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqadd v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqadd v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqadd v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqadd v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqadd v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.uadd.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -98,23 +94,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.uadd.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqadd v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqadd v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqadd v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqadd v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqadd v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqadd v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.uadd.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -135,16 +145,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s1, [x0] -; CHECK-NEXT: ldr s2, [x1] -; CHECK-NEXT: movi d0, #0xff00ff00ff00ff -; CHECK-NEXT: uaddl v1.8h, v1.8b, v2.8b -; CHECK-NEXT: umin v0.4h, v1.4h, v0.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s1, [x0] +; CHECK-SD-NEXT: ldr s2, [x1] +; CHECK-SD-NEXT: movi d0, #0xff00ff00ff00ff +; CHECK-SD-NEXT: uaddl v1.8h, v1.8b, v2.8b +; CHECK-SD-NEXT: umin v0.4h, v1.4h, v0.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqadd v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -194,24 +227,38 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: movi d2, #0x00ffff0000ffff -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: add v0.2s, v0.2s, v1.2s -; CHECK-NEXT: umin v0.2s, v0.2s, v2.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: movi d2, #0x00ffff0000ffff +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: add v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: umin v0.2s, v0.2s, v2.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqadd v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -229,15 +276,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqadd v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqadd v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqadd v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqadd v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqadd v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqadd v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.uadd.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -336,23 +395,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.uadd.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqadd v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqadd v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqadd v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqadd v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqadd v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqadd v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.uadd.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -367,23 +440,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.uadd.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqadd v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqadd v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqadd v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqadd v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqadd v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqadd v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqadd v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqadd v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.uadd.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } diff --git a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll index 82c0327..3bc2796 100644 --- a/llvm/test/CodeGen/AArch64/usub_sat_vec.ll +++ b/llvm/test/CodeGen/AArch64/usub_sat_vec.ll @@ -2,28 +2,10 @@ ; RUN: llc < %s -mtriple=aarch64-- | FileCheck %s --check-prefixes=CHECK,CHECK-SD ; RUN: llc < %s -mtriple=aarch64-- -global-isel -global-isel-abort=2 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-GI -; CHECK-GI: warning: Instruction selection used fallback path for v16i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v64i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v32i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i16 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i16 +; CHECK-GI: warning: Instruction selection used fallback path for v2i8 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i8 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v12i16 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i4 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i1 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v16i32 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v4i64 -; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v8i64 ; CHECK-GI-NEXT: warning: Instruction selection used fallback path for v2i128 declare <1 x i8> @llvm.usub.sat.v1i8(<1 x i8>, <1 x i8>) @@ -68,23 +50,37 @@ define <16 x i8> @v16i8(<16 x i8> %x, <16 x i8> %y) nounwind { } define <32 x i8> @v32i8(<32 x i8> %x, <32 x i8> %y) nounwind { -; CHECK-LABEL: v32i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.16b, v1.16b, v3.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v2.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v2.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v3.16b +; CHECK-GI-NEXT: ret %z = call <32 x i8> @llvm.usub.sat.v32i8(<32 x i8> %x, <32 x i8> %y) ret <32 x i8> %z } define <64 x i8> @v64i8(<64 x i8> %x, <64 x i8> %y) nounwind { -; CHECK-LABEL: v64i8: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.16b, v2.16b, v6.16b -; CHECK-NEXT: uqsub v0.16b, v0.16b, v4.16b -; CHECK-NEXT: uqsub v1.16b, v1.16b, v5.16b -; CHECK-NEXT: uqsub v3.16b, v3.16b, v7.16b -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v64i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-SD-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-SD-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-SD-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v64i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.16b, v0.16b, v4.16b +; CHECK-GI-NEXT: uqsub v1.16b, v1.16b, v5.16b +; CHECK-GI-NEXT: uqsub v2.16b, v2.16b, v6.16b +; CHECK-GI-NEXT: uqsub v3.16b, v3.16b, v7.16b +; CHECK-GI-NEXT: ret %z = call <64 x i8> @llvm.usub.sat.v64i8(<64 x i8> %x, <64 x i8> %y) ret <64 x i8> %z } @@ -99,23 +95,37 @@ define <8 x i16> @v8i16(<8 x i16> %x, <8 x i16> %y) nounwind { } define <16 x i16> @v16i16(<16 x i16> %x, <16 x i16> %y) nounwind { -; CHECK-LABEL: v16i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.8h, v1.8h, v3.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v2.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v2.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v3.8h +; CHECK-GI-NEXT: ret %z = call <16 x i16> @llvm.usub.sat.v16i16(<16 x i16> %x, <16 x i16> %y) ret <16 x i16> %z } define <32 x i16> @v32i16(<32 x i16> %x, <32 x i16> %y) nounwind { -; CHECK-LABEL: v32i16: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.8h, v2.8h, v6.8h -; CHECK-NEXT: uqsub v0.8h, v0.8h, v4.8h -; CHECK-NEXT: uqsub v1.8h, v1.8h, v5.8h -; CHECK-NEXT: uqsub v3.8h, v3.8h, v7.8h -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v32i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-SD-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-SD-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-SD-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v32i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v4.8h +; CHECK-GI-NEXT: uqsub v1.8h, v1.8h, v5.8h +; CHECK-GI-NEXT: uqsub v2.8h, v2.8h, v6.8h +; CHECK-GI-NEXT: uqsub v3.8h, v3.8h, v7.8h +; CHECK-GI-NEXT: ret %z = call <32 x i16> @llvm.usub.sat.v32i16(<32 x i16> %x, <32 x i16> %y) ret <32 x i16> %z } @@ -136,16 +146,39 @@ define void @v8i8(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v4i8(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v4i8: -; CHECK: // %bb.0: -; CHECK-NEXT: ldr s0, [x0] -; CHECK-NEXT: ldr s1, [x1] -; CHECK-NEXT: ushll v0.8h, v0.8b, #0 -; CHECK-NEXT: ushll v1.8h, v1.8b, #0 -; CHECK-NEXT: uqsub v0.4h, v0.4h, v1.4h -; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b -; CHECK-NEXT: str s0, [x2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i8: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldr s0, [x0] +; CHECK-SD-NEXT: ldr s1, [x1] +; CHECK-SD-NEXT: ushll v0.8h, v0.8b, #0 +; CHECK-SD-NEXT: ushll v1.8h, v1.8b, #0 +; CHECK-SD-NEXT: uqsub v0.4h, v0.4h, v1.4h +; CHECK-SD-NEXT: uzp1 v0.8b, v0.8b, v0.8b +; CHECK-SD-NEXT: str s0, [x2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i8: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr w8, [x0] +; CHECK-GI-NEXT: ldr w9, [x1] +; CHECK-GI-NEXT: fmov s0, w8 +; CHECK-GI-NEXT: fmov s1, w9 +; CHECK-GI-NEXT: mov b2, v0.b[1] +; CHECK-GI-NEXT: mov b3, v1.b[1] +; CHECK-GI-NEXT: mov b4, v0.b[2] +; CHECK-GI-NEXT: mov b5, v0.b[3] +; CHECK-GI-NEXT: mov b6, v1.b[3] +; CHECK-GI-NEXT: mov v0.b[1], v2.b[0] +; CHECK-GI-NEXT: mov b2, v1.b[2] +; CHECK-GI-NEXT: mov v1.b[1], v3.b[0] +; CHECK-GI-NEXT: mov v0.b[2], v4.b[0] +; CHECK-GI-NEXT: mov v1.b[2], v2.b[0] +; CHECK-GI-NEXT: mov v0.b[3], v5.b[0] +; CHECK-GI-NEXT: mov v1.b[3], v6.b[0] +; CHECK-GI-NEXT: uqsub v0.8b, v0.8b, v1.8b +; CHECK-GI-NEXT: fmov w8, s0 +; CHECK-GI-NEXT: str w8, [x2] +; CHECK-GI-NEXT: ret %x = load <4 x i8>, ptr %px %y = load <4 x i8>, ptr %py %z = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %x, <4 x i8> %y) @@ -193,22 +226,36 @@ define void @v4i16(ptr %px, ptr %py, ptr %pz) nounwind { } define void @v2i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v2i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldrh w8, [x0] -; CHECK-NEXT: ldrh w9, [x1] -; CHECK-NEXT: ldrh w10, [x0, #2] -; CHECK-NEXT: ldrh w11, [x1, #2] -; CHECK-NEXT: fmov s0, w8 -; CHECK-NEXT: fmov s1, w9 -; CHECK-NEXT: mov v0.s[1], w10 -; CHECK-NEXT: mov v1.s[1], w11 -; CHECK-NEXT: uqsub v0.2s, v0.2s, v1.2s -; CHECK-NEXT: mov w8, v0.s[1] -; CHECK-NEXT: fmov w9, s0 -; CHECK-NEXT: strh w9, [x2] -; CHECK-NEXT: strh w8, [x2, #2] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v2i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldrh w8, [x0] +; CHECK-SD-NEXT: ldrh w9, [x1] +; CHECK-SD-NEXT: ldrh w10, [x0, #2] +; CHECK-SD-NEXT: ldrh w11, [x1, #2] +; CHECK-SD-NEXT: fmov s0, w8 +; CHECK-SD-NEXT: fmov s1, w9 +; CHECK-SD-NEXT: mov v0.s[1], w10 +; CHECK-SD-NEXT: mov v1.s[1], w11 +; CHECK-SD-NEXT: uqsub v0.2s, v0.2s, v1.2s +; CHECK-SD-NEXT: mov w8, v0.s[1] +; CHECK-SD-NEXT: fmov w9, s0 +; CHECK-SD-NEXT: strh w9, [x2] +; CHECK-SD-NEXT: strh w8, [x2, #2] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v2i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr h0, [x0] +; CHECK-GI-NEXT: ldr h1, [x0, #2] +; CHECK-GI-NEXT: ldr h2, [x1] +; CHECK-GI-NEXT: ldr h3, [x1, #2] +; CHECK-GI-NEXT: mov v0.h[1], v1.h[0] +; CHECK-GI-NEXT: mov v2.h[1], v3.h[0] +; CHECK-GI-NEXT: uqsub v0.4h, v0.4h, v2.4h +; CHECK-GI-NEXT: mov h1, v0.h[1] +; CHECK-GI-NEXT: str h0, [x2] +; CHECK-GI-NEXT: str h1, [x2, #2] +; CHECK-GI-NEXT: ret %x = load <2 x i16>, ptr %px %y = load <2 x i16>, ptr %py %z = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %x, <2 x i16> %y) @@ -226,15 +273,27 @@ define <12 x i8> @v12i8(<12 x i8> %x, <12 x i8> %y) nounwind { } define void @v12i16(ptr %px, ptr %py, ptr %pz) nounwind { -; CHECK-LABEL: v12i16: -; CHECK: // %bb.0: -; CHECK-NEXT: ldp q0, q3, [x1] -; CHECK-NEXT: ldp q1, q2, [x0] -; CHECK-NEXT: uqsub v0.8h, v1.8h, v0.8h -; CHECK-NEXT: uqsub v1.8h, v2.8h, v3.8h -; CHECK-NEXT: str q0, [x2] -; CHECK-NEXT: str d1, [x2, #16] -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v12i16: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: ldp q0, q3, [x1] +; CHECK-SD-NEXT: ldp q1, q2, [x0] +; CHECK-SD-NEXT: uqsub v0.8h, v1.8h, v0.8h +; CHECK-SD-NEXT: uqsub v1.8h, v2.8h, v3.8h +; CHECK-SD-NEXT: str q0, [x2] +; CHECK-SD-NEXT: str d1, [x2, #16] +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v12i16: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: ldr q0, [x0] +; CHECK-GI-NEXT: ldr q1, [x1] +; CHECK-GI-NEXT: ldr d2, [x0, #16] +; CHECK-GI-NEXT: ldr d3, [x1, #16] +; CHECK-GI-NEXT: uqsub v0.8h, v0.8h, v1.8h +; CHECK-GI-NEXT: uqsub v1.4h, v2.4h, v3.4h +; CHECK-GI-NEXT: str q0, [x2] +; CHECK-GI-NEXT: str d1, [x2, #16] +; CHECK-GI-NEXT: ret %x = load <12 x i16>, ptr %px %y = load <12 x i16>, ptr %py %z = call <12 x i16> @llvm.usub.sat.v12i16(<12 x i16> %x, <12 x i16> %y) @@ -334,23 +393,37 @@ define <4 x i32> @v4i32(<4 x i32> %x, <4 x i32> %y) nounwind { } define <8 x i32> @v8i32(<8 x i32> %x, <8 x i32> %y) nounwind { -; CHECK-LABEL: v8i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.4s, v1.4s, v3.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v2.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v2.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v3.4s +; CHECK-GI-NEXT: ret %z = call <8 x i32> @llvm.usub.sat.v8i32(<8 x i32> %x, <8 x i32> %y) ret <8 x i32> %z } define <16 x i32> @v16i32(<16 x i32> %x, <16 x i32> %y) nounwind { -; CHECK-LABEL: v16i32: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.4s, v2.4s, v6.4s -; CHECK-NEXT: uqsub v0.4s, v0.4s, v4.4s -; CHECK-NEXT: uqsub v1.4s, v1.4s, v5.4s -; CHECK-NEXT: uqsub v3.4s, v3.4s, v7.4s -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v16i32: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-SD-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-SD-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-SD-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v16i32: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.4s, v0.4s, v4.4s +; CHECK-GI-NEXT: uqsub v1.4s, v1.4s, v5.4s +; CHECK-GI-NEXT: uqsub v2.4s, v2.4s, v6.4s +; CHECK-GI-NEXT: uqsub v3.4s, v3.4s, v7.4s +; CHECK-GI-NEXT: ret %z = call <16 x i32> @llvm.usub.sat.v16i32(<16 x i32> %x, <16 x i32> %y) ret <16 x i32> %z } @@ -365,23 +438,37 @@ define <2 x i64> @v2i64(<2 x i64> %x, <2 x i64> %y) nounwind { } define <4 x i64> @v4i64(<4 x i64> %x, <4 x i64> %y) nounwind { -; CHECK-LABEL: v4i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v1.2d, v1.2d, v3.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v2.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v4i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v4i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v2.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v3.2d +; CHECK-GI-NEXT: ret %z = call <4 x i64> @llvm.usub.sat.v4i64(<4 x i64> %x, <4 x i64> %y) ret <4 x i64> %z } define <8 x i64> @v8i64(<8 x i64> %x, <8 x i64> %y) nounwind { -; CHECK-LABEL: v8i64: -; CHECK: // %bb.0: -; CHECK-NEXT: uqsub v2.2d, v2.2d, v6.2d -; CHECK-NEXT: uqsub v0.2d, v0.2d, v4.2d -; CHECK-NEXT: uqsub v1.2d, v1.2d, v5.2d -; CHECK-NEXT: uqsub v3.2d, v3.2d, v7.2d -; CHECK-NEXT: ret +; CHECK-SD-LABEL: v8i64: +; CHECK-SD: // %bb.0: +; CHECK-SD-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-SD-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-SD-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-SD-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-SD-NEXT: ret +; +; CHECK-GI-LABEL: v8i64: +; CHECK-GI: // %bb.0: +; CHECK-GI-NEXT: uqsub v0.2d, v0.2d, v4.2d +; CHECK-GI-NEXT: uqsub v1.2d, v1.2d, v5.2d +; CHECK-GI-NEXT: uqsub v2.2d, v2.2d, v6.2d +; CHECK-GI-NEXT: uqsub v3.2d, v3.2d, v7.2d +; CHECK-GI-NEXT: ret %z = call <8 x i64> @llvm.usub.sat.v8i64(<8 x i64> %x, <8 x i64> %y) ret <8 x i64> %z } -- cgit v1.1 From cd7517859eef14d8b38cec2d52c0625a58c645a2 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Wed, 3 Apr 2024 16:10:19 +0800 Subject: Revert "[Win32][ELF] Make CodeView a DebugInfoFormat only for COFF format (#87149)" This reverts commit 4b25053ae47f50095371a663391baadfd2694eb0. There're failures in some target. --- clang/lib/Driver/ToolChains/MSVC.h | 5 +++-- clang/test/Misc/win32-elf.c | 5 ----- 2 files changed, 3 insertions(+), 7 deletions(-) delete mode 100644 clang/test/Misc/win32-elf.c diff --git a/clang/lib/Driver/ToolChains/MSVC.h b/clang/lib/Driver/ToolChains/MSVC.h index 3950a8e..48369e0 100644 --- a/clang/lib/Driver/ToolChains/MSVC.h +++ b/clang/lib/Driver/ToolChains/MSVC.h @@ -61,8 +61,9 @@ public: /// formats, and to DWARF otherwise. Users can use -gcodeview and -gdwarf to /// override the default. llvm::codegenoptions::DebugInfoFormat getDefaultDebugFormat() const override { - return getTriple().isOSBinFormatCOFF() ? llvm::codegenoptions::DIF_CodeView - : llvm::codegenoptions::DIF_DWARF; + return getTriple().isOSBinFormatMachO() + ? llvm::codegenoptions::DIF_DWARF + : llvm::codegenoptions::DIF_CodeView; } /// Set the debugger tuning to "default", since we're definitely not tuning diff --git a/clang/test/Misc/win32-elf.c b/clang/test/Misc/win32-elf.c deleted file mode 100644 index f75281d..0000000 --- a/clang/test/Misc/win32-elf.c +++ /dev/null @@ -1,5 +0,0 @@ -// Check that basic use of win32-elf targets works. -// RUN: %clang -fsyntax-only -target x86_64-pc-win32-elf %s - -// RUN: %clang -fsyntax-only -target x86_64-pc-win32-elf -g %s -### 2>&1 | FileCheck %s -check-prefix=DEBUG-INFO -// DEBUG-INFO: -dwarf-version={{.*}} -- cgit v1.1 From 4dd103e9c65de7d3dbf12e76fbb72724127ec325 Mon Sep 17 00:00:00 2001 From: Elizaveta Noskova <159026035+enoskova-sc@users.noreply.github.com> Date: Wed, 3 Apr 2024 11:22:43 +0300 Subject: [CodeGen][ShrinkWrap] Clarify StackAddressUsedBlockInfo meaning (#80679) --- llvm/lib/CodeGen/ShrinkWrap.cpp | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/llvm/lib/CodeGen/ShrinkWrap.cpp b/llvm/lib/CodeGen/ShrinkWrap.cpp index ab57d08..a4b2299 100644 --- a/llvm/lib/CodeGen/ShrinkWrap.cpp +++ b/llvm/lib/CodeGen/ShrinkWrap.cpp @@ -161,9 +161,11 @@ class ShrinkWrap : public MachineFunctionPass { /// Current MachineFunction. MachineFunction *MachineFunc = nullptr; - /// Is `true` for block numbers where we can guarantee no stack access - /// or computation of stack-relative addresses on any CFG path including - /// the block itself. + /// Is `true` for the block numbers where we assume possible stack accesses + /// or computation of stack-relative addresses on any CFG path including the + /// block itself. Is `false` for basic blocks where we can guarantee the + /// opposite. False positives won't lead to incorrect analysis results, + /// therefore this approach is fair. BitVector StackAddressUsedBlockInfo; /// Check if \p MI uses or defines a callee-saved register or @@ -948,6 +950,9 @@ bool ShrinkWrap::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; + // Initially, conservatively assume that stack addresses can be used in each + // basic block and change the state only for those basic blocks for which we + // were able to prove the opposite. StackAddressUsedBlockInfo.resize(MF.getNumBlockIDs(), true); bool HasCandidate = performShrinkWrapping(RPOT, RS.get()); StackAddressUsedBlockInfo.clear(); -- cgit v1.1 From 72c29fa9e226a928b3d3a01d74f6b44a0b31b7d4 Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 3 Apr 2024 16:15:30 +0800 Subject: [C++20] [Modules] [Driver] Emit unused argument warning if we use '-fmodule-output' with non-module input We required the file name of an 'importable module unit' should end with .cppm (or .ccm, .cxxm, .c++m). But the driver can accept '-fmodule-output' for files with normal suffixes (e.g., .cpp). This is somewhat inconsistency. In this patch, we only claim the option `-fmodule-output` is used if the type of the input file is modules related. Then now the compiler will emit 'unused argument' warnings if the input file is not modules related. --- clang/docs/ReleaseNotes.rst | 3 +++ clang/lib/Driver/ToolChains/Clang.cpp | 15 ++++++++++++--- clang/test/Driver/module-output.cppm | 8 ++++++++ 3 files changed, 23 insertions(+), 3 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index d5ce54e..3237842 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -328,6 +328,9 @@ Improvements to Clang's diagnostics - New ``-Wformat-signedness`` diagnostic that warn if the format string requires an unsigned argument and the argument is signed and vice versa. +- Clang now emits ``unused argument`` warning when the -fmodule-output flag is used + with an input that is not of type c++-module. + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b03ac60..7fd6ad6 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -4045,9 +4045,18 @@ static bool RenderModulesOptions(Compilation &C, const Driver &D, // module fragment. CmdArgs.push_back("-fskip-odr-check-in-gmf"); - // Claim `-fmodule-output` and `-fmodule-output=` to avoid unused warnings. - Args.ClaimAllArgs(options::OPT_fmodule_output); - Args.ClaimAllArgs(options::OPT_fmodule_output_EQ); + // We need to include the case the input file is a module file here. + // Since the default compilation model for C++ module interface unit will + // create temporary module file and compile the temporary module file + // to get the object file. Then the `-fmodule-output` flag will be + // brought to the second compilation process. So we have to claim it for + // the case too. + if (Input.getType() == driver::types::TY_CXXModule || + Input.getType() == driver::types::TY_PP_CXXModule || + Input.getType() == driver::types::TY_ModuleFile) { + Args.ClaimAllArgs(options::OPT_fmodule_output); + Args.ClaimAllArgs(options::OPT_fmodule_output_EQ); + } return HaveModules; } diff --git a/clang/test/Driver/module-output.cppm b/clang/test/Driver/module-output.cppm index d0cab0cb..dea9cf9 100644 --- a/clang/test/Driver/module-output.cppm +++ b/clang/test/Driver/module-output.cppm @@ -33,6 +33,9 @@ // RUN: %clang -std=c++20 %t/Hello.cppm -fmodule-output=%t/Hello.pcm -fmodule-output -c -fsyntax-only \ // RUN: -### 2>&1 | FileCheck %t/Hello.cppm --check-prefix=CHECK-NOT-USED +// Test that we can emit a warning if the type of the input file is not a module interface unit. +// RUN: %clang -std=c++20 %t/a.cpp -fmodule-output -c -o %t/a.o -### 2>&1 | FileCheck %t/a.cpp + //--- Hello.cppm export module Hello; @@ -55,3 +58,8 @@ export module AnotherModule; // CHECK: "-emit-obj" {{.*}}"-main-file-name" "Hello.cppm" {{.*}}"-o" "{{.*}}/Hello-{{.*}}.o" "-x" "pcm" "{{.*}}/Hello.pcm" // CHECK: "-emit-module-interface" {{.*}}"-main-file-name" "AnotherModule.cppm" {{.*}}"-o" "{{.*}}/AnotherModule.pcm" "-x" "c++" "{{.*}}/AnotherModule.cppm" // CHECK: "-emit-obj" {{.*}}"-main-file-name" "AnotherModule.cppm" {{.*}}"-o" "{{.*}}/AnotherModule-{{.*}}.o" "-x" "pcm" "{{.*}}/AnotherModule.pcm" + +//--- a.cpp +export module a; + +// CHECK: warning: argument unused during compilation: '-fmodule-output' -- cgit v1.1 From 37eb0d4948dad6d2399915fde6eb5800c3fe825b Mon Sep 17 00:00:00 2001 From: Chuanqi Xu Date: Wed, 3 Apr 2024 16:38:57 +0800 Subject: [NFC] Check the nullness of pointer before dereference it in the assertion This was part of https://github.com/llvm/llvm-project/pull/85050. It is suggested to split the unrelated change as much as possible. So here is the patch. --- clang/lib/Serialization/GeneratePCH.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Serialization/GeneratePCH.cpp b/clang/lib/Serialization/GeneratePCH.cpp index fa71226..2fece29 100644 --- a/clang/lib/Serialization/GeneratePCH.cpp +++ b/clang/lib/Serialization/GeneratePCH.cpp @@ -102,7 +102,7 @@ ReducedBMIGenerator::ReducedBMIGenerator(Preprocessor &PP, Module *ReducedBMIGenerator::getEmittingModule(ASTContext &Ctx) { Module *M = Ctx.getCurrentNamedModule(); - assert(M->isNamedModuleUnit() && + assert(M && M->isNamedModuleUnit() && "ReducedBMIGenerator should only be used with C++20 Named modules."); return M; } -- cgit v1.1 From e5abd963c758bcfa1380d688bec31dddc834a2dd Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 3 Apr 2024 09:43:11 +0100 Subject: [VPlan] Remove VPTransformState::addMetadata with ArrayRef arg (NFCI). addMeadata is only over called with a single element, clean up the variant that takes multiple values. --- llvm/lib/Transforms/Vectorize/VPlan.cpp | 17 ++++------------- llvm/lib/Transforms/Vectorize/VPlan.h | 6 +----- 2 files changed, 5 insertions(+), 18 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index 9a8f53c..f0b7008 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -358,23 +358,14 @@ void VPTransformState::addNewMetadata(Instruction *To, LVer->annotateInstWithNoAlias(To, Orig); } -void VPTransformState::addMetadata(Instruction *To, Instruction *From) { +void VPTransformState::addMetadata(Value *To, Instruction *From) { // No source instruction to transfer metadata from? if (!From) return; - propagateMetadata(To, From); - addNewMetadata(To, From); -} - -void VPTransformState::addMetadata(ArrayRef To, Instruction *From) { - // No source instruction to transfer metadata from? - if (!From) - return; - - for (Value *V : To) { - if (Instruction *I = dyn_cast(V)) - addMetadata(I, From); + if (Instruction *ToI = dyn_cast(To)) { + propagateMetadata(ToI, From); + addNewMetadata(ToI, From); } } diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 707a826..813ebda 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -346,11 +346,7 @@ struct VPTransformState { /// This includes both the original MDs from \p From and additional ones (\see /// addNewMetadata). Use this for *newly created* instructions in the vector /// loop. - void addMetadata(Instruction *To, Instruction *From); - - /// Similar to the previous function but it adds the metadata to a - /// vector of instructions. - void addMetadata(ArrayRef To, Instruction *From); + void addMetadata(Value *To, Instruction *From); /// Set the debug location in the builder using the debug location \p DL. void setDebugLocFrom(DebugLoc DL); -- cgit v1.1 From 29c7d1a60c9d45e82f08cd7487178846ed5f9c6d Mon Sep 17 00:00:00 2001 From: Chen Zheng Date: Wed, 3 Apr 2024 04:45:40 -0400 Subject: [PPC] [NFC] add testcase for more store forwarding --- llvm/test/CodeGen/PowerPC/legalize-vaarg.ll | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll index b7f8b8a..8980049 100644 --- a/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll +++ b/llvm/test/CodeGen/PowerPC/legalize-vaarg.ll @@ -1,6 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=BE ;RUN: llc < %s --mtriple=powerpc64le-unknown-linux-gnu -mattr=+altivec | FileCheck %s -check-prefix=LE +;RUN: llc < %s --mtriple=powerpc64-unknown-linux-gnu -mattr=+altivec -ppc-gather-alias-max-depth=0 | FileCheck %s -check-prefix=FORWARD define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) { ; BE-LABEL: test_large_vec_vaarg: @@ -35,6 +36,22 @@ define <8 x i32> @test_large_vec_vaarg(i32 %n, ...) { ; LE-NEXT: lxvd2x 0, 0, 3 ; LE-NEXT: xxswapd 35, 0 ; LE-NEXT: blr +; +; FORWARD-LABEL: test_large_vec_vaarg: +; FORWARD: # %bb.0: +; FORWARD-NEXT: ld 3, -8(1) +; FORWARD-NEXT: addi 3, 3, 15 +; FORWARD-NEXT: rldicr 3, 3, 0, 59 +; FORWARD-NEXT: addi 4, 3, 16 +; FORWARD-NEXT: std 4, -8(1) +; FORWARD-NEXT: ld 4, -8(1) +; FORWARD-NEXT: lvx 2, 0, 3 +; FORWARD-NEXT: addi 4, 4, 15 +; FORWARD-NEXT: rldicr 3, 4, 0, 59 +; FORWARD-NEXT: addi 4, 3, 16 +; FORWARD-NEXT: std 4, -8(1) +; FORWARD-NEXT: lvx 3, 0, 3 +; FORWARD-NEXT: blr %args = alloca ptr, align 4 %x = va_arg ptr %args, <8 x i32> ret <8 x i32> %x -- cgit v1.1 From 7c7ce0b9b1cef51e24f2dc7e904a8adf6aaf1abf Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 3 Apr 2024 10:17:25 +0100 Subject: [AMDGPU] Remove useless aliases for FLAT instructions. NFC. (#87462) We were generating "" (the empty string) as an alias for a bunch of FLAT instructions, which had no effect except to cause tablegen to generate some very long if-else chains in the generate AsmMatcher. --- llvm/lib/Target/AMDGPU/FLATInstructions.td | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/FLATInstructions.td b/llvm/lib/Target/AMDGPU/FLATInstructions.td index d017ec4..27d5616 100644 --- a/llvm/lib/Target/AMDGPU/FLATInstructions.td +++ b/llvm/lib/Target/AMDGPU/FLATInstructions.td @@ -2558,7 +2558,7 @@ multiclass VFLAT_Real_Base_gfx12 op, multiclass VFLAT_Real_Atomics_gfx12 op, string name = get_FLAT_ps.Mnemonic, - string alias = ""> : + string alias = name> : VFLAT_Real_Base_gfx12 { defm _RTN : VFLAT_Real_gfx12; } @@ -2581,7 +2581,7 @@ multiclass VGLOBAL_Real_AllAddr_gfx12_w64 op, multiclass VGLOBAL_Real_Atomics_gfx12 op, string name = get_FLAT_ps.Mnemonic, - string alias = ""> : + string alias = name> : VGLOBAL_Real_AllAddr_gfx12 { defm _RTN : VFLAT_Real_gfx12; defm _SADDR_RTN : VFLAT_Real_gfx12; -- cgit v1.1 From e05c1b46d0d3739cc48ad912dbe6e9affce05927 Mon Sep 17 00:00:00 2001 From: Daniel Grumberg Date: Wed, 3 Apr 2024 10:18:05 +0100 Subject: Reenable external categories (#87357) Reenables b31414bf4f9898f7817a9fcf8a91f62ec26f3eaf. Also adds a new warning for missing `--symbol-graph-dir` arg when `--emit-extension-symbol-graphs` is provided. This also reverts the commit that removed. --- clang/include/clang/Basic/DiagnosticDriverKinds.td | 6 + .../include/clang/Basic/DiagnosticFrontendKinds.td | 4 + clang/include/clang/Basic/DiagnosticGroups.td | 2 + clang/include/clang/Driver/Options.td | 21 +- clang/include/clang/ExtractAPI/API.h | 1547 +++++++++---------- clang/include/clang/ExtractAPI/APIRecords.inc | 103 ++ .../clang/ExtractAPI/DeclarationFragments.h | 14 + .../clang/ExtractAPI/ExtractAPIActionBase.h | 8 +- clang/include/clang/ExtractAPI/ExtractAPIVisitor.h | 674 +++++---- clang/include/clang/ExtractAPI/FrontendActions.h | 6 - .../clang/ExtractAPI/Serialization/APISetVisitor.h | 172 +++ .../ExtractAPI/Serialization/SerializerBase.h | 314 ---- .../Serialization/SymbolGraphSerializer.h | 254 ++-- clang/include/clang/Frontend/FrontendOptions.h | 21 +- clang/lib/Driver/Driver.cpp | 7 + clang/lib/Driver/ToolChains/Clang.cpp | 15 + clang/lib/ExtractAPI/API.cpp | 544 +------ clang/lib/ExtractAPI/DeclarationFragments.cpp | 71 +- clang/lib/ExtractAPI/ExtractAPIConsumer.cpp | 112 +- .../Serialization/SymbolGraphSerializer.cpp | 943 +++++------- .../ExtractAPI/TypedefUnderlyingTypeResolver.cpp | 6 +- .../lib/FrontendTool/ExecuteCompilerInvocation.cpp | 10 +- .../test/ExtractAPI/anonymous_record_no_typedef.c | 3 +- clang/test/ExtractAPI/availability.c | 2 +- clang/test/ExtractAPI/bool.c | 2 +- clang/test/ExtractAPI/bool.cpp | 2 +- clang/test/ExtractAPI/class.cpp | 2 +- clang/test/ExtractAPI/class_template.cpp | 2 +- .../class_template_param_inheritance.cpp | 2 +- .../ExtractAPI/class_template_partial_spec.cpp | 4 +- clang/test/ExtractAPI/class_template_spec.cpp | 2 +- clang/test/ExtractAPI/concept.cpp | 2 +- clang/test/ExtractAPI/constructor_destructor.cpp | 6 +- clang/test/ExtractAPI/conversions.cpp | 2 +- .../test/ExtractAPI/emit-symbol-graph/multi_file.c | 7 +- .../ExtractAPI/emit-symbol-graph/single_file.c | 5 +- clang/test/ExtractAPI/enum.c | 2 +- clang/test/ExtractAPI/field_template.cpp | 2 +- clang/test/ExtractAPI/function_noexcepts.cpp | 2 +- clang/test/ExtractAPI/global_func_template.cpp | 2 +- .../test/ExtractAPI/global_func_template_spec.cpp | 2 +- clang/test/ExtractAPI/global_record.c | 2 +- clang/test/ExtractAPI/global_record_multifile.c | 2 +- clang/test/ExtractAPI/global_var_template.cpp | 2 +- .../global_var_template_partial_spec.cpp | 2 +- clang/test/ExtractAPI/global_var_template_spec.cpp | 2 +- clang/test/ExtractAPI/known_files_only.c | 101 +- clang/test/ExtractAPI/language.c | 6 +- clang/test/ExtractAPI/macro_undefined.c | 2 +- clang/test/ExtractAPI/macros.c | 2 +- clang/test/ExtractAPI/metadata_and_module.c | 32 + clang/test/ExtractAPI/method_template.cpp | 2 +- clang/test/ExtractAPI/method_template_spec.cpp | 2 +- clang/test/ExtractAPI/methods.cpp | 660 +++------ clang/test/ExtractAPI/multiple_inheritance.cpp | 2 +- clang/test/ExtractAPI/namespace.cpp | 2 +- clang/test/ExtractAPI/nested_namespaces.cpp | 2 +- clang/test/ExtractAPI/objc_block.m | 1567 ++++++++------------ clang/test/ExtractAPI/objc_category.m | 338 +---- clang/test/ExtractAPI/objc_external_category.m | 49 + clang/test/ExtractAPI/objc_id_protocol.m | 357 +---- clang/test/ExtractAPI/objc_instancetype.m | 4 +- clang/test/ExtractAPI/objc_interface.m | 1033 +++++-------- clang/test/ExtractAPI/objc_module_category.m | 404 ----- clang/test/ExtractAPI/objc_property.m | 600 +------- clang/test/ExtractAPI/objc_protocol.m | 2 +- clang/test/ExtractAPI/objc_various_categories.m | 507 ------- clang/test/ExtractAPI/operator_overload.cpp | 2 +- clang/test/ExtractAPI/relative_include.m | 2 +- clang/test/ExtractAPI/simple_inheritance.cpp | 2 +- clang/test/ExtractAPI/struct.c | 2 +- clang/test/ExtractAPI/typedef.c | 464 ++---- clang/test/ExtractAPI/typedef_anonymous_record.c | 612 ++------ clang/test/ExtractAPI/typedef_chain.c | 2 +- clang/test/ExtractAPI/typedef_struct_enum.c | 561 ++----- clang/test/ExtractAPI/underscored.c | 411 +---- clang/test/ExtractAPI/union.c | 4 +- clang/test/ExtractAPI/vfs_redirected_include.m | 2 +- clang/test/Index/extract-api-cursor.m | 9 + clang/tools/libclang/CXExtractAPI.cpp | 73 +- 80 files changed, 3892 insertions(+), 8845 deletions(-) create mode 100644 clang/include/clang/ExtractAPI/APIRecords.inc create mode 100644 clang/include/clang/ExtractAPI/Serialization/APISetVisitor.h delete mode 100644 clang/include/clang/ExtractAPI/Serialization/SerializerBase.h create mode 100644 clang/test/ExtractAPI/metadata_and_module.c create mode 100644 clang/test/ExtractAPI/objc_external_category.m delete mode 100644 clang/test/ExtractAPI/objc_module_category.m delete mode 100644 clang/test/ExtractAPI/objc_various_categories.m diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 592ed3b..3d86f75 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -548,6 +548,12 @@ def err_drv_extract_api_wrong_kind : Error< "header file '%0' input '%1' does not match the type of prior input " "in api extraction; use '-x %2' to override">; +def err_drv_missing_symbol_graph_dir: Error< + "Must provide a symbol graph output directory using --symbol-graph-dir=">; + +def err_drv_unexpected_symbol_graph_output : Error< + "Unexpected output symbol graph '%1'; please provide --symbol-graph-dir= instead">; + def warn_slash_u_filename : Warning<"'/U%0' treated as the '/U' option">, InGroup>; def note_use_dashdash : Note< diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index ba23cf8..14b08d4 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -366,4 +366,8 @@ def warn_profile_data_misexpect : Warning< def err_extract_api_ignores_file_not_found : Error<"file '%0' specified by '--extract-api-ignores=' not found">, DefaultFatal; +def warn_missing_symbol_graph_dir : Warning< + "Missing symbol graph output directory, defaulting to working directory">, + InGroup; + } diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index 520168f..5251774 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -1517,3 +1517,5 @@ def UnsafeBufferUsage : DiagGroup<"unsafe-buffer-usage", [UnsafeBufferUsageInCon // Warnings and notes InstallAPI verification. def InstallAPIViolation : DiagGroup<"installapi-violation">; +// Warnings about misuse of ExtractAPI options. +def ExtractAPIMisuse : DiagGroup<"extractapi-misuse">; diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index f5289fb..c3e90a7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -1507,14 +1507,29 @@ def extract_api : Flag<["-"], "extract-api">, def product_name_EQ: Joined<["--"], "product-name=">, Visibility<[ClangOption, CC1Option]>, MarshallingInfoString>; -def emit_symbol_graph_EQ: JoinedOrSeparate<["--"], "emit-symbol-graph=">, +def emit_symbol_graph: Flag<["-"], "emit-symbol-graph">, Visibility<[ClangOption, CC1Option]>, - HelpText<"Generate Extract API information as a side effect of compilation.">, - MarshallingInfoString>; + HelpText<"Generate Extract API information as a side effect of compilation.">, + MarshallingInfoFlag>; +def emit_extension_symbol_graphs: Flag<["--"], "emit-extension-symbol-graphs">, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Generate additional symbol graphs for extended modules.">, + MarshallingInfoFlag>; def extract_api_ignores_EQ: CommaJoined<["--"], "extract-api-ignores=">, Visibility<[ClangOption, CC1Option]>, HelpText<"Comma separated list of files containing a new line separated list of API symbols to ignore when extracting API information.">, MarshallingInfoStringVector>; +def symbol_graph_dir_EQ: Joined<["--"], "symbol-graph-dir=">, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Directory in which to emit symbol graphs.">, + MarshallingInfoString>; +def emit_pretty_sgf: Flag<["--"], "pretty-sgf">, + Visibility<[ClangOption, CC1Option]>, + HelpText<"Emit pretty printed symbol graphs">, + MarshallingInfoFlag>; +def emit_sgf_symbol_labels_for_testing: Flag<["--"], "emit-sgf-symbol-labels-for-testing">, + Visibility<[CC1Option]>, + MarshallingInfoFlag>; def e : Separate<["-"], "e">, Flags<[LinkerInput]>, Group; def fmax_tokens_EQ : Joined<["-"], "fmax-tokens=">, Group, Visibility<[ClangOption, CC1Option]>, diff --git a/clang/include/clang/ExtractAPI/API.h b/clang/include/clang/ExtractAPI/API.h index b220db2..92cacf6 100644 --- a/clang/include/clang/ExtractAPI/API.h +++ b/clang/include/clang/ExtractAPI/API.h @@ -20,17 +20,25 @@ #include "clang/AST/Availability.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/DeclObjC.h" #include "clang/AST/RawCommentList.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" #include "clang/ExtractAPI/DeclarationFragments.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Casting.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/TargetParser/Triple.h" +#include +#include #include +#include #include namespace clang { @@ -149,15 +157,58 @@ public: /// \endcode using DocComment = std::vector; -// Classes deriving from APIRecord need to have USR be the first constructor -// argument. This is so that they are compatible with `addTopLevelRecord` -// defined in API.cpp +struct APIRecord; + +// This represents a reference to another symbol that might come from external +/// sources. +struct SymbolReference { + StringRef Name; + StringRef USR; + + /// The source project/module/product of the referred symbol. + StringRef Source; + + // A Pointer to the APIRecord for this reference if known + const APIRecord *Record = nullptr; + + SymbolReference() = default; + SymbolReference(StringRef Name, StringRef USR, StringRef Source = "") + : Name(Name), USR(USR), Source(Source) {} + SymbolReference(const APIRecord *R); + + /// Determine if this SymbolReference is empty. + /// + /// \returns true if and only if all \c Name, \c USR, and \c Source is empty. + bool empty() const { return Name.empty() && USR.empty() && Source.empty(); } +}; + +class RecordContext; + +// Concrete classes deriving from APIRecord need to have a construct with first +// arguments USR, and Name, in that order. This is so that they +// are compatible with `APISet::createRecord`. +// When adding a new kind of record don't forget to update APIRecords.inc! /// The base representation of an API record. Holds common symbol information. struct APIRecord { /// Discriminator for LLVM-style RTTI (dyn_cast<> et al.) enum RecordKind { RK_Unknown, + // If adding a record context record kind here make sure to update + // RecordContext::classof if needed and add a RECORD_CONTEXT entry to + // APIRecords.inc + RK_FirstRecordContext, RK_Namespace, + RK_Enum, + RK_Struct, + RK_Union, + RK_ObjCInterface, + RK_ObjCCategory, + RK_ObjCProtocol, + RK_CXXClass, + RK_ClassTemplate, + RK_ClassTemplateSpecialization, + RK_ClassTemplatePartialSpecialization, + RK_LastRecordContext, RK_GlobalFunction, RK_GlobalFunctionTemplate, RK_GlobalFunctionTemplateSpecialization, @@ -166,18 +217,11 @@ struct APIRecord { RK_GlobalVariableTemplateSpecialization, RK_GlobalVariableTemplatePartialSpecialization, RK_EnumConstant, - RK_Enum, RK_StructField, - RK_Struct, RK_UnionField, - RK_Union, RK_StaticField, RK_CXXField, RK_CXXFieldTemplate, - RK_CXXClass, - RK_ClassTemplate, - RK_ClassTemplateSpecialization, - RK_ClassTemplatePartialSpecialization, RK_Concept, RK_CXXStaticMethod, RK_CXXInstanceMethod, @@ -190,40 +234,15 @@ struct APIRecord { RK_ObjCIvar, RK_ObjCClassMethod, RK_ObjCInstanceMethod, - RK_ObjCInterface, - RK_ObjCCategory, - RK_ObjCCategoryModule, - RK_ObjCProtocol, RK_MacroDefinition, RK_Typedef, }; - /// Stores information about the context of the declaration of this API. - /// This is roughly analogous to the DeclContext hierarchy for an AST Node. - struct HierarchyInformation { - /// The USR of the parent API. - StringRef ParentUSR; - /// The name of the parent API. - StringRef ParentName; - /// The record kind of the parent API. - RecordKind ParentKind = RK_Unknown; - /// A pointer to the parent APIRecord if known. - APIRecord *ParentRecord = nullptr; - - HierarchyInformation() = default; - HierarchyInformation(StringRef ParentUSR, StringRef ParentName, - RecordKind Kind, APIRecord *ParentRecord = nullptr) - : ParentUSR(ParentUSR), ParentName(ParentName), ParentKind(Kind), - ParentRecord(ParentRecord) {} - - bool empty() const { - return ParentUSR.empty() && ParentName.empty() && - ParentKind == RK_Unknown && ParentRecord == nullptr; - } - }; - StringRef USR; StringRef Name; + + SymbolReference Parent; + PresumedLoc Location; AvailabilityInfo Availability; LinkageInfo Linkage; @@ -242,79 +261,169 @@ struct APIRecord { /// Objective-C class/instance methods). DeclarationFragments SubHeading; - /// Information about the parent record of this record. - HierarchyInformation ParentInformation; - /// Whether the symbol was defined in a system header. bool IsFromSystemHeader; + AccessControl Access; + private: const RecordKind Kind; + friend class RecordContext; + // Used to store the next child record in RecordContext. This works because + // APIRecords semantically only have one parent. + mutable APIRecord *NextInContext = nullptr; public: + APIRecord *getNextInContext() const { return NextInContext; } + RecordKind getKind() const { return Kind; } + static APIRecord *castFromRecordContext(const RecordContext *Ctx); + static RecordContext *castToRecordContext(const APIRecord *Record); + APIRecord() = delete; APIRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Location, AvailabilityInfo Availability, - LinkageInfo Linkage, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - bool IsFromSystemHeader) - : USR(USR), Name(Name), Location(Location), + SymbolReference Parent, PresumedLoc Location, + AvailabilityInfo Availability, LinkageInfo Linkage, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader, + AccessControl Access = AccessControl()) + : USR(USR), Name(Name), Parent(std::move(Parent)), Location(Location), Availability(std::move(Availability)), Linkage(Linkage), Comment(Comment), Declaration(Declaration), SubHeading(SubHeading), - IsFromSystemHeader(IsFromSystemHeader), Kind(Kind) {} + IsFromSystemHeader(IsFromSystemHeader), Access(std::move(Access)), + Kind(Kind) {} APIRecord(RecordKind Kind, StringRef USR, StringRef Name) : USR(USR), Name(Name), Kind(Kind) {} // Pure virtual destructor to make APIRecord abstract virtual ~APIRecord() = 0; + static bool classof(const APIRecord *Record) { return true; } + static bool classofKind(RecordKind K) { return true; } + static bool classof(const RecordContext *Ctx) { return true; } +}; + +/// Base class used for specific record types that have children records this is +/// analogous to the DeclContext for the AST +class RecordContext { +public: + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(APIRecord::RecordKind K) { + return K > APIRecord::RK_FirstRecordContext && + K < APIRecord::RK_LastRecordContext; + } + + static bool classof(const RecordContext *Context) { return true; } + + RecordContext(APIRecord::RecordKind Kind) : Kind(Kind) {} + + APIRecord::RecordKind getKind() const { return Kind; } + + struct record_iterator { + private: + APIRecord *Current = nullptr; + + public: + using value_type = APIRecord *; + using reference = const value_type &; + using pointer = const value_type *; + using iterator_category = std::forward_iterator_tag; + using difference_type = std::ptrdiff_t; + + record_iterator() = default; + explicit record_iterator(value_type R) : Current(R) {} + reference operator*() const { return Current; } + // This doesn't strictly meet the iterator requirements, but it's the + // behavior we want here. + value_type operator->() const { return Current; } + record_iterator &operator++() { + Current = Current->getNextInContext(); + return *this; + } + record_iterator operator++(int) { + record_iterator tmp(*this); + ++(*this); + return tmp; + } + + friend bool operator==(record_iterator x, record_iterator y) { + return x.Current == y.Current; + } + friend bool operator!=(record_iterator x, record_iterator y) { + return x.Current != y.Current; + } + }; + + using record_range = llvm::iterator_range; + record_range records() const { + return record_range(records_begin(), records_end()); + } + record_iterator records_begin() const { return record_iterator(First); }; + record_iterator records_end() const { return record_iterator(); } + bool records_empty() const { return First == nullptr; }; + +private: + APIRecord::RecordKind Kind; + mutable APIRecord *First = nullptr; + mutable APIRecord *Last = nullptr; + +protected: + friend class APISet; + void addToRecordChain(APIRecord *) const; }; -struct NamespaceRecord : APIRecord { - NamespaceRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, +struct NamespaceRecord : APIRecord, RecordContext { + NamespaceRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(RK_Namespace, USR, Name, Loc, std::move(Availability), + : APIRecord(RK_Namespace, USR, Name, Parent, Loc, std::move(Availability), Linkage, Comment, Declaration, SubHeading, - IsFromSystemHeader) {} + IsFromSystemHeader), + RecordContext(RK_Namespace) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_Namespace; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_Namespace; } }; /// This holds information associated with global functions. struct GlobalFunctionRecord : APIRecord { FunctionSignature Signature; - GlobalFunctionRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, + GlobalFunctionRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) - : APIRecord(RK_GlobalFunction, USR, Name, Loc, std::move(Availability), - Linkage, Comment, Declaration, SubHeading, - IsFromSystemHeader), + : APIRecord(RK_GlobalFunction, USR, Name, Parent, Loc, + std::move(Availability), Linkage, Comment, Declaration, + SubHeading, IsFromSystemHeader), Signature(Signature) {} GlobalFunctionRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - LinkageInfo Linkage, const DocComment &Comment, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, LinkageInfo Linkage, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), Linkage, - Comment, Declaration, SubHeading, IsFromSystemHeader), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), + Linkage, Comment, Declaration, SubHeading, + IsFromSystemHeader), Signature(Signature) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalFunction; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_GlobalFunction; } private: virtual void anchor(); @@ -323,63 +432,74 @@ private: struct GlobalFunctionTemplateRecord : GlobalFunctionRecord { Template Templ; - GlobalFunctionTemplateRecord(StringRef USR, StringRef Name, PresumedLoc Loc, + GlobalFunctionTemplateRecord(StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, Template Template, bool IsFromSystemHeader) - : GlobalFunctionRecord(RK_GlobalFunctionTemplate, USR, Name, Loc, + : GlobalFunctionRecord(RK_GlobalFunctionTemplate, USR, Name, Parent, Loc, std::move(Availability), Linkage, Comment, Declaration, SubHeading, Signature, IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalFunctionTemplate; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_GlobalFunctionTemplate; } }; struct GlobalFunctionTemplateSpecializationRecord : GlobalFunctionRecord { GlobalFunctionTemplateSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) : GlobalFunctionRecord(RK_GlobalFunctionTemplateSpecialization, USR, Name, - Loc, std::move(Availability), Linkage, Comment, - Declaration, SubHeading, Signature, + Parent, Loc, std::move(Availability), Linkage, + Comment, Declaration, SubHeading, Signature, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalFunctionTemplateSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_GlobalFunctionTemplateSpecialization; } }; /// This holds information associated with global functions. struct GlobalVariableRecord : APIRecord { - GlobalVariableRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, + GlobalVariableRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(RK_GlobalVariable, USR, Name, Loc, std::move(Availability), - Linkage, Comment, Declaration, SubHeading, - IsFromSystemHeader) {} + : APIRecord(RK_GlobalVariable, USR, Name, Parent, Loc, + std::move(Availability), Linkage, Comment, Declaration, + SubHeading, IsFromSystemHeader) {} GlobalVariableRecord(RecordKind Kind, StringRef USR, StringRef Name, + SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), Linkage, - Comment, Declaration, SubHeading, IsFromSystemHeader) {} + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), + Linkage, Comment, Declaration, SubHeading, + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalVariable; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_GlobalVariable; } private: virtual void anchor(); @@ -388,34 +508,42 @@ private: struct GlobalVariableTemplateRecord : GlobalVariableRecord { Template Templ; - GlobalVariableTemplateRecord(StringRef USR, StringRef Name, PresumedLoc Loc, + GlobalVariableTemplateRecord(StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, class Template Template, bool IsFromSystemHeader) - : GlobalVariableRecord(RK_GlobalVariableTemplate, USR, Name, Loc, + : GlobalVariableRecord(RK_GlobalVariableTemplate, USR, Name, Parent, Loc, std::move(Availability), Linkage, Comment, Declaration, SubHeading, IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalVariableTemplate; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_GlobalVariableTemplate; } }; struct GlobalVariableTemplateSpecializationRecord : GlobalVariableRecord { GlobalVariableTemplateSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) : GlobalVariableRecord(RK_GlobalVariableTemplateSpecialization, USR, Name, - Loc, std::move(Availability), Linkage, Comment, - Declaration, SubHeading, IsFromSystemHeader) {} + Parent, Loc, std::move(Availability), Linkage, + Comment, Declaration, SubHeading, + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalVariableTemplateSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_GlobalVariableTemplateSpecialization; } }; @@ -424,126 +552,203 @@ struct GlobalVariableTemplatePartialSpecializationRecord Template Templ; GlobalVariableTemplatePartialSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, class Template Template, bool IsFromSystemHeader) : GlobalVariableRecord(RK_GlobalVariableTemplatePartialSpecialization, - USR, Name, Loc, std::move(Availability), Linkage, - Comment, Declaration, SubHeading, + USR, Name, Parent, Loc, std::move(Availability), + Linkage, Comment, Declaration, SubHeading, IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_GlobalVariableTemplatePartialSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_GlobalVariableTemplatePartialSpecialization; } }; /// This holds information associated with enum constants. struct EnumConstantRecord : APIRecord { - EnumConstantRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + EnumConstantRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(RK_EnumConstant, USR, Name, Loc, std::move(Availability), - LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader) {} + : APIRecord(RK_EnumConstant, USR, Name, Parent, Loc, + std::move(Availability), LinkageInfo::none(), Comment, + Declaration, SubHeading, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_EnumConstant; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_EnumConstant; } private: virtual void anchor(); }; /// This holds information associated with enums. -struct EnumRecord : APIRecord { - SmallVector> Constants; - - EnumRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - bool IsFromSystemHeader) - : APIRecord(RK_Enum, USR, Name, Loc, std::move(Availability), +struct EnumRecord : APIRecord, RecordContext { + EnumRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : APIRecord(RK_Enum, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader) {} + IsFromSystemHeader), + RecordContext(RK_Enum) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_Enum; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_Enum; } private: virtual void anchor(); }; -/// This holds information associated with struct fields. +/// This holds information associated with struct or union fields fields. struct RecordFieldRecord : APIRecord { - RecordFieldRecord(StringRef USR, StringRef Name, PresumedLoc Loc, + RecordFieldRecord(RecordKind Kind, StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, RecordKind Kind, - bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_StructField || - Record->getKind() == RK_UnionField; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_StructField || K == RK_UnionField; } -private: - virtual void anchor(); + virtual ~RecordFieldRecord() = 0; }; -/// This holds information associated with structs. -struct RecordRecord : APIRecord { - SmallVector> Fields; - - RecordRecord(StringRef USR, StringRef Name, PresumedLoc Loc, +/// This holds information associated with structs and unions. +struct RecordRecord : APIRecord, RecordContext { + RecordRecord(RecordKind Kind, StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, RecordKind Kind, - bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader) {} + IsFromSystemHeader), + RecordContext(Kind) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_Struct || Record->getKind() == RK_Union; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_Struct || K == RK_Union; } + virtual ~RecordRecord() = 0; +}; + +struct StructFieldRecord : RecordFieldRecord { + StructFieldRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : RecordFieldRecord(RK_StructField, USR, Name, Parent, Loc, + std::move(Availability), Comment, Declaration, + SubHeading, IsFromSystemHeader) {} + + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_StructField; } + private: virtual void anchor(); }; -struct CXXFieldRecord : APIRecord { - AccessControl Access; +struct StructRecord : RecordRecord { + StructRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : RecordRecord(RK_Struct, USR, Name, Parent, Loc, std::move(Availability), + Comment, Declaration, SubHeading, IsFromSystemHeader) {} - CXXFieldRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_Struct; } + +private: + virtual void anchor(); +}; + +struct UnionFieldRecord : RecordFieldRecord { + UnionFieldRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : RecordFieldRecord(RK_UnionField, USR, Name, Parent, Loc, + std::move(Availability), Comment, Declaration, + SubHeading, IsFromSystemHeader) {} + + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_UnionField; } + +private: + virtual void anchor(); +}; + +struct UnionRecord : RecordRecord { + UnionRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, + DeclarationFragments SubHeading, bool IsFromSystemHeader) + : RecordRecord(RK_Union, USR, Name, Parent, Loc, std::move(Availability), + Comment, Declaration, SubHeading, IsFromSystemHeader) {} + + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_Union; } + +private: + virtual void anchor(); +}; + +struct CXXFieldRecord : APIRecord { + CXXFieldRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, AccessControl Access, bool IsFromSystemHeader) - : APIRecord(RK_CXXField, USR, Name, Loc, std::move(Availability), + : APIRecord(RK_CXXField, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader), - Access(Access) {} + IsFromSystemHeader, std::move(Access)) {} CXXFieldRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading, AccessControl Access, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader), - Access(Access) {} + IsFromSystemHeader, std::move(Access)) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXField; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_CXXField || K == RK_CXXFieldTemplate || K == RK_StaticField; } private: @@ -553,111 +758,122 @@ private: struct CXXFieldTemplateRecord : CXXFieldRecord { Template Templ; - CXXFieldTemplateRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + CXXFieldTemplateRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, AccessControl Access, Template Template, bool IsFromSystemHeader) - : CXXFieldRecord(RK_CXXFieldTemplate, USR, Name, Loc, + : CXXFieldRecord(RK_CXXFieldTemplate, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Access, IsFromSystemHeader), + SubHeading, std::move(Access), IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXFieldTemplate; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXFieldTemplate; } }; struct CXXMethodRecord : APIRecord { FunctionSignature Signature; - AccessControl Access; CXXMethodRecord() = delete; CXXMethodRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader), - Signature(Signature), Access(Access) {} + IsFromSystemHeader, std::move(Access)), + Signature(Signature) {} virtual ~CXXMethodRecord() = 0; }; struct CXXConstructorRecord : CXXMethodRecord { - CXXConstructorRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + CXXConstructorRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXConstructorMethod, USR, Name, Loc, + : CXXMethodRecord(RK_CXXConstructorMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader) {} + SubHeading, Signature, std::move(Access), + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXConstructorMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXConstructorMethod; } private: virtual void anchor(); }; struct CXXDestructorRecord : CXXMethodRecord { - CXXDestructorRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + CXXDestructorRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXDestructorMethod, USR, Name, Loc, + : CXXMethodRecord(RK_CXXDestructorMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader) {} + SubHeading, Signature, std::move(Access), + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXDestructorMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXDestructorMethod; } private: virtual void anchor(); }; struct CXXStaticMethodRecord : CXXMethodRecord { - CXXStaticMethodRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + CXXStaticMethodRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXStaticMethod, USR, Name, Loc, + : CXXMethodRecord(RK_CXXStaticMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader) {} + SubHeading, Signature, std::move(Access), + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXStaticMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXStaticMethod; } private: virtual void anchor(); }; struct CXXInstanceMethodRecord : CXXMethodRecord { - CXXInstanceMethodRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + CXXInstanceMethodRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXInstanceMethod, USR, Name, Loc, + : CXXMethodRecord(RK_CXXInstanceMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader) {} + SubHeading, Signature, std::move(Access), + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXInstanceMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXInstanceMethod; } private: virtual void anchor(); @@ -666,36 +882,42 @@ private: struct CXXMethodTemplateRecord : CXXMethodRecord { Template Templ; - CXXMethodTemplateRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + CXXMethodTemplateRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, Template Template, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXMethodTemplate, USR, Name, Loc, + : CXXMethodRecord(RK_CXXMethodTemplate, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader), + SubHeading, Signature, std::move(Access), + IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXMethodTemplate; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_CXXMethodTemplate; } }; struct CXXMethodTemplateSpecializationRecord : CXXMethodRecord { CXXMethodTemplateSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, AccessControl Access, bool IsFromSystemHeader) - : CXXMethodRecord(RK_CXXMethodTemplateSpecialization, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader) {} + : CXXMethodRecord(RK_CXXMethodTemplateSpecialization, USR, Name, Parent, + Loc, std::move(Availability), Comment, Declaration, + SubHeading, Signature, std::move(Access), + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_CXXMethodTemplateSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_CXXMethodTemplateSpecialization; } }; @@ -714,13 +936,13 @@ struct ObjCPropertyRecord : APIRecord { bool IsOptional; ObjCPropertyRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, AttributeKind Attributes, StringRef GetterName, StringRef SetterName, bool IsOptional, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader), Attributes(Attributes), GetterName(GetterName), SetterName(SetterName), @@ -733,44 +955,44 @@ struct ObjCPropertyRecord : APIRecord { }; struct ObjCInstancePropertyRecord : ObjCPropertyRecord { - ObjCInstancePropertyRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - AttributeKind Attributes, StringRef GetterName, - StringRef SetterName, bool IsOptional, - bool IsFromSystemHeader) - : ObjCPropertyRecord(RK_ObjCInstanceProperty, USR, Name, Loc, + ObjCInstancePropertyRecord( + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading, + AttributeKind Attributes, StringRef GetterName, StringRef SetterName, + bool IsOptional, bool IsFromSystemHeader) + : ObjCPropertyRecord(RK_ObjCInstanceProperty, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, SubHeading, Attributes, GetterName, SetterName, IsOptional, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCInstanceProperty; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCInstanceProperty; } private: virtual void anchor(); }; struct ObjCClassPropertyRecord : ObjCPropertyRecord { - ObjCClassPropertyRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + ObjCClassPropertyRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, AttributeKind Attributes, StringRef GetterName, StringRef SetterName, bool IsOptional, bool IsFromSystemHeader) - : ObjCPropertyRecord(RK_ObjCClassProperty, USR, Name, Loc, + : ObjCPropertyRecord(RK_ObjCClassProperty, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, SubHeading, Attributes, GetterName, SetterName, IsOptional, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCClassProperty; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCClassProperty; } private: virtual void anchor(); @@ -778,23 +1000,21 @@ private: /// This holds information associated with Objective-C instance variables. struct ObjCInstanceVariableRecord : APIRecord { - using AccessControl = ObjCIvarDecl::AccessControl; - AccessControl Access; - - ObjCInstanceVariableRecord(StringRef USR, StringRef Name, PresumedLoc Loc, + ObjCInstanceVariableRecord(StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, - AccessControl Access, bool IsFromSystemHeader) - : APIRecord(RK_ObjCIvar, USR, Name, Loc, std::move(Availability), + bool IsFromSystemHeader) + : APIRecord(RK_ObjCIvar, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader), - Access(Access) {} + IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCIvar; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCIvar; } private: virtual void anchor(); @@ -807,11 +1027,12 @@ struct ObjCMethodRecord : APIRecord { ObjCMethodRecord() = delete; ObjCMethodRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, const DocComment &Comment, + DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader), Signature(Signature) {} @@ -820,122 +1041,103 @@ struct ObjCMethodRecord : APIRecord { }; struct ObjCInstanceMethodRecord : ObjCMethodRecord { - ObjCInstanceMethodRecord(StringRef USR, StringRef Name, PresumedLoc Loc, + ObjCInstanceMethodRecord(StringRef USR, StringRef Name, + SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) - : ObjCMethodRecord(RK_ObjCInstanceMethod, USR, Name, Loc, + : ObjCMethodRecord(RK_ObjCInstanceMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, SubHeading, Signature, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCInstanceMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCInstanceMethod; } private: virtual void anchor(); }; struct ObjCClassMethodRecord : ObjCMethodRecord { - ObjCClassMethodRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, + ObjCClassMethodRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, FunctionSignature Signature, bool IsFromSystemHeader) - : ObjCMethodRecord(RK_ObjCClassMethod, USR, Name, Loc, + : ObjCMethodRecord(RK_ObjCClassMethod, USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, SubHeading, Signature, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCClassMethod; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCClassMethod; } private: virtual void anchor(); }; -/// This represents a reference to another symbol that might come from external -/// sources. -struct SymbolReference { - StringRef Name; - StringRef USR; - - /// The source project/module/product of the referred symbol. - StringRef Source; - - SymbolReference() = default; - SymbolReference(StringRef Name, StringRef USR = "", StringRef Source = "") - : Name(Name), USR(USR), Source(Source) {} - SymbolReference(const APIRecord &Record) - : Name(Record.Name), USR(Record.USR) {} - SymbolReference(const APIRecord *Record) - : Name(Record->Name), USR(Record->USR) {} - - /// Determine if this SymbolReference is empty. - /// - /// \returns true if and only if all \c Name, \c USR, and \c Source is empty. - bool empty() const { return Name.empty() && USR.empty() && Source.empty(); } -}; - struct StaticFieldRecord : CXXFieldRecord { - SymbolReference Context; - - StaticFieldRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, SymbolReference Context, - AccessControl Access, bool IsFromSystemHeader) - : CXXFieldRecord(RK_StaticField, USR, Name, Loc, std::move(Availability), - Comment, Declaration, SubHeading, Access, - IsFromSystemHeader), - Context(Context) {} + StaticFieldRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + LinkageInfo Linkage, const DocComment &Comment, + DeclarationFragments Declaration, + DeclarationFragments SubHeading, AccessControl Access, + bool IsFromSystemHeader) + : CXXFieldRecord(RK_StaticField, USR, Name, Parent, Loc, + std::move(Availability), Comment, Declaration, + SubHeading, std::move(Access), IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_StaticField; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_StaticField; } }; /// The base representation of an Objective-C container record. Holds common /// information associated with Objective-C containers. -struct ObjCContainerRecord : APIRecord { - SmallVector> Methods; - SmallVector> Properties; - SmallVector> Ivars; +struct ObjCContainerRecord : APIRecord, RecordContext { SmallVector Protocols; ObjCContainerRecord() = delete; ObjCContainerRecord(RecordKind Kind, StringRef USR, StringRef Name, - PresumedLoc Loc, AvailabilityInfo Availability, - LinkageInfo Linkage, const DocComment &Comment, + SymbolReference Parent, PresumedLoc Loc, + AvailabilityInfo Availability, LinkageInfo Linkage, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), Linkage, - Comment, Declaration, SubHeading, IsFromSystemHeader) {} + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), + Linkage, Comment, Declaration, SubHeading, + IsFromSystemHeader), + RecordContext(Kind) {} virtual ~ObjCContainerRecord() = 0; }; -struct CXXClassRecord : APIRecord { - SmallVector> Fields; - SmallVector> Methods; +struct CXXClassRecord : APIRecord, RecordContext { SmallVector Bases; - AccessControl Access; - CXXClassRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, + CXXClassRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, RecordKind Kind, AccessControl Access, bool IsFromSystemHeader) - : APIRecord(Kind, USR, Name, Loc, std::move(Availability), + : APIRecord(Kind, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, - IsFromSystemHeader), - Access(Access) {} + IsFromSystemHeader, std::move(Access)), + RecordContext(Kind) {} static bool classof(const APIRecord *Record) { - return (Record->getKind() == RK_CXXClass); + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_CXXClass || K == RK_ClassTemplate || + K == RK_ClassTemplateSpecialization || + K == RK_ClassTemplatePartialSpecialization; } private: @@ -945,86 +1147,108 @@ private: struct ClassTemplateRecord : CXXClassRecord { Template Templ; - ClassTemplateRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + ClassTemplateRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, Template Template, AccessControl Access, bool IsFromSystemHeader) - : CXXClassRecord(USR, Name, Loc, std::move(Availability), Comment, - Declaration, SubHeading, RK_ClassTemplate, Access, - IsFromSystemHeader), + : CXXClassRecord(USR, Name, Parent, Loc, std::move(Availability), Comment, + Declaration, SubHeading, RK_ClassTemplate, + std::move(Access), IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ClassTemplate; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ClassTemplate; } }; struct ClassTemplateSpecializationRecord : CXXClassRecord { ClassTemplateSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, AccessControl Access, bool IsFromSystemHeader) - : CXXClassRecord(USR, Name, Loc, std::move(Availability), Comment, + : CXXClassRecord(USR, Name, Parent, Loc, std::move(Availability), Comment, Declaration, SubHeading, RK_ClassTemplateSpecialization, Access, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ClassTemplateSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_ClassTemplateSpecialization; } }; struct ClassTemplatePartialSpecializationRecord : CXXClassRecord { Template Templ; ClassTemplatePartialSpecializationRecord( - StringRef USR, StringRef Name, PresumedLoc Loc, + StringRef USR, StringRef Name, SymbolReference Parent, PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, Template Template, AccessControl Access, bool IsFromSystemHeader) - : CXXClassRecord(USR, Name, Loc, std::move(Availability), Comment, - Declaration, SubHeading, RK_ClassTemplateSpecialization, - Access, IsFromSystemHeader), + : CXXClassRecord(USR, Name, Parent, Loc, std::move(Availability), Comment, + Declaration, SubHeading, + RK_ClassTemplatePartialSpecialization, Access, + IsFromSystemHeader), Templ(Template) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ClassTemplatePartialSpecialization; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { + return K == RK_ClassTemplatePartialSpecialization; } }; struct ConceptRecord : APIRecord { Template Templ; - ConceptRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, + ConceptRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, Template Template, bool IsFromSystemHeader) - : APIRecord(RK_Concept, USR, Name, Loc, std::move(Availability), + : APIRecord(RK_Concept, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader), Templ(Template) {} + + static bool classof(const APIRecord *Record) { + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_Concept; } }; /// This holds information associated with Objective-C categories. struct ObjCCategoryRecord : ObjCContainerRecord { SymbolReference Interface; - /// Determine whether the Category is derived from external class interface. - bool IsFromExternalModule = false; - ObjCCategoryRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + ObjCCategoryRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, SymbolReference Interface, bool IsFromSystemHeader) - : ObjCContainerRecord(RK_ObjCCategory, USR, Name, Loc, + : ObjCContainerRecord(RK_ObjCCategory, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader), Interface(Interface) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCCategory; + return classofKind(Record->getKind()); + } + static bool classofKind(RecordKind K) { return K == RK_ObjCCategory; } + + bool isExtendingExternalModule() const { return !Interface.Source.empty(); } + + std::optional getExtendedExternalModule() const { + if (!isExtendingExternalModule()) + return {}; + return Interface.Source; } private: @@ -1034,23 +1258,22 @@ private: /// This holds information associated with Objective-C interfaces/classes. struct ObjCInterfaceRecord : ObjCContainerRecord { SymbolReference SuperClass; - // ObjCCategoryRecord%s are stored in and owned by APISet. - SmallVector Categories; - ObjCInterfaceRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, + ObjCInterfaceRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + LinkageInfo Linkage, const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, SymbolReference SuperClass, bool IsFromSystemHeader) - : ObjCContainerRecord(RK_ObjCInterface, USR, Name, Loc, + : ObjCContainerRecord(RK_ObjCInterface, USR, Name, Parent, Loc, std::move(Availability), Linkage, Comment, Declaration, SubHeading, IsFromSystemHeader), SuperClass(SuperClass) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCInterface; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCInterface; } private: virtual void anchor(); @@ -1058,18 +1281,20 @@ private: /// This holds information associated with Objective-C protocols. struct ObjCProtocolRecord : ObjCContainerRecord { - ObjCProtocolRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, + ObjCProtocolRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : ObjCContainerRecord(RK_ObjCProtocol, USR, Name, Loc, + : ObjCContainerRecord(RK_ObjCProtocol, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo::none(), Comment, Declaration, SubHeading, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_ObjCProtocol; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_ObjCProtocol; } private: virtual void anchor(); @@ -1077,17 +1302,18 @@ private: /// This holds information associated with macro definitions. struct MacroDefinitionRecord : APIRecord { - MacroDefinitionRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - DeclarationFragments Declaration, + MacroDefinitionRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, DeclarationFragments Declaration, DeclarationFragments SubHeading, bool IsFromSystemHeader) - : APIRecord(RK_MacroDefinition, USR, Name, Loc, AvailabilityInfo(), - LinkageInfo(), {}, Declaration, SubHeading, - IsFromSystemHeader) {} + : APIRecord(RK_MacroDefinition, USR, Name, Parent, Loc, + AvailabilityInfo(), LinkageInfo(), {}, Declaration, + SubHeading, IsFromSystemHeader) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_MacroDefinition; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_MacroDefinition; } private: virtual void anchor(); @@ -1101,575 +1327,228 @@ private: struct TypedefRecord : APIRecord { SymbolReference UnderlyingType; - TypedefRecord(StringRef USR, StringRef Name, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, + TypedefRecord(StringRef USR, StringRef Name, SymbolReference Parent, + PresumedLoc Loc, AvailabilityInfo Availability, + const DocComment &Comment, DeclarationFragments Declaration, DeclarationFragments SubHeading, SymbolReference UnderlyingType, bool IsFromSystemHeader) - : APIRecord(RK_Typedef, USR, Name, Loc, std::move(Availability), + : APIRecord(RK_Typedef, USR, Name, Parent, Loc, std::move(Availability), LinkageInfo(), Comment, Declaration, SubHeading, IsFromSystemHeader), UnderlyingType(UnderlyingType) {} static bool classof(const APIRecord *Record) { - return Record->getKind() == RK_Typedef; + return classofKind(Record->getKind()); } + static bool classofKind(RecordKind K) { return K == RK_Typedef; } private: virtual void anchor(); }; -/// Check if a record type has a function signature mixin. -/// -/// This is denoted by the record type having a ``Signature`` field of type -/// FunctionSignature. -template -struct has_function_signature : public std::false_type {}; -template <> -struct has_function_signature : public std::true_type {}; -template <> -struct has_function_signature : public std::true_type {}; -template <> -struct has_function_signature - : public std::true_type {}; -template <> -struct has_function_signature : public std::true_type {}; -template <> -struct has_function_signature : public std::true_type {}; -template <> -struct has_function_signature : public std::true_type {}; -template <> -struct has_function_signature : public std::true_type { -}; -template <> -struct has_function_signature - : public std::true_type {}; - -template struct has_access : public std::false_type {}; -template <> struct has_access : public std::true_type {}; -template <> struct has_access : public std::true_type {}; -template <> struct has_access : public std::true_type {}; -template <> -struct has_access : public std::true_type {}; -template <> -struct has_access - : public std::true_type {}; -template <> -struct has_access : public std::true_type {}; -template <> struct has_access : public std::true_type {}; -template <> struct has_access : public std::true_type {}; -template <> -struct has_access : public std::true_type {}; -template <> -struct has_access - : public std::true_type {}; - -template struct has_template : public std::false_type {}; -template <> struct has_template : public std::true_type {}; -template <> -struct has_template - : public std::true_type {}; -template <> struct has_template : public std::true_type {}; -template <> -struct has_template : public std::true_type {}; -template <> -struct has_template - : public std::true_type {}; -template <> -struct has_template : public std::true_type {}; -template <> -struct has_template : public std::true_type {}; - -template <> -struct has_template : public std::true_type {}; -template <> -struct has_function_signature - : public std::true_type {}; -template <> -struct has_function_signature - : public std::true_type {}; - /// APISet holds the set of API records collected from given inputs. class APISet { public: - NamespaceRecord *addNamespace(APIRecord *Parent, StringRef Name, - StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - LinkageInfo Linkage, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeaderg); - /// Create and add a global variable record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - GlobalVariableRecord * - addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeadin, bool IsFromSystemHeaderg); + /// Get the target triple for the ExtractAPI invocation. + const llvm::Triple &getTarget() const { return Target; } - GlobalVariableTemplateRecord * - addGlobalVariableTemplate(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - bool IsFromSystemHeader); + /// Get the language used by the APIs. + Language getLanguage() const { return Lang; } - /// Create and add a function record into the API set. + /// Finds the APIRecord for a given USR. /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - GlobalFunctionRecord * - addGlobalFunction(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, - FunctionSignature Signature, bool IsFromSystemHeader); - - GlobalFunctionTemplateRecord *addGlobalFunctionTemplate( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, FunctionSignature Signature, - Template Template, bool IsFromSystemHeader); - - GlobalFunctionTemplateSpecializationRecord * - addGlobalFunctionTemplateSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, FunctionSignature Signature, - bool IsFromSystemHeader); + /// \returns a pointer to the APIRecord associated with that USR or nullptr. + APIRecord *findRecordForUSR(StringRef USR) const; - /// Create and add an enum constant record into the API set. + /// Copy \p String into the Allocator in this APISet. /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - EnumConstantRecord * - addEnumConstant(EnumRecord *Enum, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader); + /// \returns a StringRef of the copied string in APISet::Allocator. + StringRef copyString(StringRef String); - /// Create and add an enum record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - EnumRecord *addEnum(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader); + SymbolReference createSymbolReference(StringRef Name, StringRef USR, + StringRef Source = ""); - /// Create and add a record field record into the API set. + /// Create a subclass of \p APIRecord and store it in the APISet. /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - RecordFieldRecord * - addRecordField(RecordRecord *Record, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, APIRecord::RecordKind Kind, - bool IsFromSystemHeader); - - /// Create and add a record record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - RecordRecord *addRecord(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - APIRecord::RecordKind Kind, bool IsFromSystemHeader); - - StaticFieldRecord * - addStaticField(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, SymbolReference Context, - AccessControl Access, bool IsFromSystemHeaderg); - - CXXFieldRecord *addCXXField(APIRecord *CXXClass, StringRef Name, - StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - AccessControl Access, bool IsFromSystemHeader); - - CXXFieldTemplateRecord *addCXXFieldTemplate( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - AccessControl Access, Template Template, bool IsFromSystemHeader); - - CXXClassRecord *addCXXClass(APIRecord *Parent, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - APIRecord::RecordKind Kind, AccessControl Access, - bool IsFromSystemHeader); - - ClassTemplateRecord * - addClassTemplate(APIRecord *Parent, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - AccessControl Access, bool IsFromSystemHeader); - - ClassTemplateSpecializationRecord *addClassTemplateSpecialization( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - AccessControl Access, bool IsFromSystemHeader); - - ClassTemplatePartialSpecializationRecord * - addClassTemplatePartialSpecialization( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - Template Template, AccessControl Access, bool IsFromSystemHeader); - - GlobalVariableTemplateSpecializationRecord * - addGlobalVariableTemplateSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader); - - GlobalVariableTemplatePartialSpecializationRecord * - addGlobalVariableTemplatePartialSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - bool IsFromSystemHeader); - - CXXMethodRecord *addCXXInstanceMethod( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader); - - CXXMethodRecord *addCXXStaticMethod( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader); - - CXXMethodRecord *addCXXSpecialMethod( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader); - - CXXMethodTemplateRecord *addCXXMethodTemplate( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, Template Template, - bool IsFromSystemHeader); + /// \returns A pointer to the created record or the already existing record + /// matching this USR. + template + typename std::enable_if_t, RecordTy> * + createRecord(StringRef USR, StringRef Name, CtorArgsContTy &&...CtorArgs); + + ArrayRef getTopLevelRecords() const { + return TopLevelRecords; + } - CXXMethodTemplateSpecializationRecord *addCXXMethodTemplateSpec( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader); + APISet(const llvm::Triple &Target, Language Lang, + const std::string &ProductName) + : Target(Target), Lang(Lang), ProductName(ProductName) {} - ConceptRecord *addConcept(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - bool IsFromSystemHeader); + // Prevent moves and copies + APISet(const APISet &Other) = delete; + APISet &operator=(const APISet &Other) = delete; + APISet(APISet &&Other) = delete; + APISet &operator=(APISet &&Other) = delete; - /// Create and add an Objective-C category record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCCategoryRecord * - addObjCCategory(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, SymbolReference Interface, - bool IsFromSystemHeader, bool IsFromExternalModule); +private: + /// BumpPtrAllocator that serves as the memory arena for the allocated objects + llvm::BumpPtrAllocator Allocator; - /// Create and add an Objective-C interface record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCInterfaceRecord * - addObjCInterface(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, SymbolReference SuperClass, - bool IsFromSystemHeader); + const llvm::Triple Target; + const Language Lang; - /// Create and add an Objective-C method record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCMethodRecord * - addObjCMethod(ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, FunctionSignature Signature, - bool IsInstanceMethod, bool IsFromSystemHeader); + struct APIRecordDeleter { + void operator()(APIRecord *Record) { Record->~APIRecord(); } + }; - /// Create and add an Objective-C property record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCPropertyRecord * - addObjCProperty(ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, - ObjCPropertyRecord::AttributeKind Attributes, - StringRef GetterName, StringRef SetterName, bool IsOptional, - bool IsInstanceProperty, bool IsFromSystemHeader); + // Ensure that the destructor of each record is called when the LookupTable is + // destroyed without calling delete operator as the memory for the record + // lives in the BumpPtrAllocator. + using APIRecordStoredPtr = std::unique_ptr; + llvm::DenseMap USRBasedLookupTable; + std::vector TopLevelRecords; - /// Create and add an Objective-C instance variable record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCInstanceVariableRecord *addObjCInstanceVariable( - ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - ObjCInstanceVariableRecord::AccessControl Access, - bool IsFromSystemHeader); +public: + const std::string ProductName; +}; - /// Create and add an Objective-C protocol record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - ObjCProtocolRecord * - addObjCProtocol(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader); +template +typename std::enable_if_t, RecordTy> * +APISet::createRecord(StringRef USR, StringRef Name, + CtorArgsContTy &&...CtorArgs) { + // Ensure USR refers to a String stored in the allocator. + auto USRString = copyString(USR); + auto Result = USRBasedLookupTable.insert({USRString, nullptr}); + RecordTy *Record; + + // Create the record if it does not already exist + if (Result.second) { + Record = new (Allocator) RecordTy( + USRString, copyString(Name), std::forward(CtorArgs)...); + // Store the record in the record lookup map + Result.first->second = APIRecordStoredPtr(Record); + + if (auto *ParentContext = + dyn_cast_if_present(Record->Parent.Record)) + ParentContext->addToRecordChain(Record); + else + TopLevelRecords.push_back(Record); + } else { + Record = dyn_cast(Result.first->second.get()); + } - /// Create a macro definition record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSRForMacro(StringRef Name, - /// SourceLocation SL, const SourceManager &SM) is a helper method to generate - /// the USR for the macro and keep it alive in APISet. - MacroDefinitionRecord *addMacroDefinition(StringRef Name, StringRef USR, - PresumedLoc Loc, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeader); - - /// Create a typedef record into the API set. - /// - /// Note: the caller is responsible for keeping the StringRef \p Name and - /// \p USR alive. APISet::copyString provides a way to copy strings into - /// APISet itself, and APISet::recordUSR(const Decl *D) is a helper method - /// to generate the USR for \c D and keep it alive in APISet. - TypedefRecord * - addTypedef(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - SymbolReference UnderlyingType, bool IsFromSystemHeader); - - /// A mapping type to store a set of APIRecord%s with the USR as the key. - template ::value>> - using RecordMap = llvm::MapVector>; + return Record; +} - /// Get the target triple for the ExtractAPI invocation. - const llvm::Triple &getTarget() const { return Target; } +// Helper type for implementing casting to RecordContext pointers. +// Selected when FromTy not a known subclass of RecordContext. +template > +struct ToRecordContextCastInfoWrapper { + static_assert(std::is_base_of_v, + "Can only cast APIRecord and derived classes to RecordContext"); - /// Get the language used by the APIs. - Language getLanguage() const { return Lang; } + static bool isPossible(FromTy *From) { return RecordContext::classof(From); } - const RecordMap &getNamespaces() const { return Namespaces; } - const RecordMap &getGlobalFunctions() const { - return GlobalFunctions; - } - const RecordMap & - getGlobalFunctionTemplates() const { - return GlobalFunctionTemplates; - } - const RecordMap & - getGlobalFunctionTemplateSpecializations() const { - return GlobalFunctionTemplateSpecializations; - } - const RecordMap &getGlobalVariables() const { - return GlobalVariables; - } - const RecordMap & - getGlobalVariableTemplates() const { - return GlobalVariableTemplates; + static RecordContext *doCast(FromTy *From) { + return APIRecord::castToRecordContext(From); } - const RecordMap &getStaticFields() const { - return StaticFields; - } - const RecordMap & - getGlobalVariableTemplateSpecializations() const { - return GlobalVariableTemplateSpecializations; - } - const RecordMap & - getGlobalVariableTemplatePartialSpecializations() const { - return GlobalVariableTemplatePartialSpecializations; - } - const RecordMap &getEnums() const { return Enums; } - const RecordMap &getRecords() const { return Records; } - const RecordMap &getCXXClasses() const { return CXXClasses; } - const RecordMap &getCXXMethodTemplates() const { - return CXXMethodTemplates; - } - const RecordMap &getCXXInstanceMethods() const { - return CXXInstanceMethods; - } - const RecordMap &getCXXStaticMethods() const { - return CXXStaticMethods; - } - const RecordMap &getCXXFields() const { return CXXFields; } - const RecordMap & - getCXXMethodTemplateSpecializations() const { - return CXXMethodTemplateSpecializations; - } - const RecordMap &getCXXFieldTemplates() const { - return CXXFieldTemplates; - } - const RecordMap &getClassTemplates() const { - return ClassTemplates; - } - const RecordMap & - getClassTemplateSpecializations() const { - return ClassTemplateSpecializations; +}; + +// Selected when FromTy is a known subclass of RecordContext. +template struct ToRecordContextCastInfoWrapper { + static_assert(std::is_base_of_v, + "Can only cast APIRecord and derived classes to RecordContext"); + static bool isPossible(const FromTy *From) { return true; } + static RecordContext *doCast(FromTy *From) { + return static_cast(From); } - const RecordMap & - getClassTemplatePartialSpecializations() const { - return ClassTemplatePartialSpecializations; +}; + +// Helper type for implementing casting to RecordContext pointers. +// Selected when ToTy isn't a known subclass of RecordContext +template > +struct FromRecordContextCastInfoWrapper { + static_assert( + std::is_base_of_v, + "Can only class RecordContext to APIRecord and derived classes"); + + static bool isPossible(RecordContext *Ctx) { + return ToTy::classofKind(Ctx->getKind()); } - const RecordMap &getConcepts() const { return Concepts; } - const RecordMap &getObjCCategories() const { - return ObjCCategories; + + static ToTy *doCast(RecordContext *Ctx) { + return APIRecord::castFromRecordContext(Ctx); } - const RecordMap &getObjCInterfaces() const { - return ObjCInterfaces; +}; + +// Selected when ToTy is a known subclass of RecordContext. +template struct FromRecordContextCastInfoWrapper { + static_assert( + std::is_base_of_v, + "Can only class RecordContext to APIRecord and derived classes"); + static bool isPossible(RecordContext *Ctx) { + return ToTy::classof(Ctx->getKind()); } - const RecordMap &getObjCProtocols() const { - return ObjCProtocols; + static RecordContext *doCast(RecordContext *Ctx) { + return static_cast(Ctx); } - const RecordMap &getMacros() const { return Macros; } - const RecordMap &getTypedefs() const { return Typedefs; } - - /// Finds the APIRecord for a given USR. - /// - /// \returns a pointer to the APIRecord associated with that USR or nullptr. - APIRecord *findRecordForUSR(StringRef USR) const; - - /// Generate and store the USR of declaration \p D. - /// - /// Note: The USR string is stored in and owned by Allocator. - /// - /// \returns a StringRef of the generated USR string. - StringRef recordUSR(const Decl *D); - - /// Generate and store the USR for a macro \p Name. - /// - /// Note: The USR string is stored in and owned by Allocator. - /// - /// \returns a StringRef to the generate USR string. - StringRef recordUSRForMacro(StringRef Name, SourceLocation SL, - const SourceManager &SM); - - /// Copy \p String into the Allocator in this APISet. - /// - /// \returns a StringRef of the copied string in APISet::Allocator. - StringRef copyString(StringRef String); +}; - APISet(const llvm::Triple &Target, Language Lang, - const std::string &ProductName) - : Target(Target), Lang(Lang), ProductName(ProductName) {} +} // namespace extractapi +} // namespace clang -private: - /// BumpPtrAllocator to store generated/copied strings. - /// - /// Note: The main use for this is being able to deduplicate strings. - llvm::BumpPtrAllocator StringAllocator; +// Implement APIRecord (and derived classes) to and from RecordContext +// conversions +namespace llvm { + +template +struct CastInfo<::clang::extractapi::RecordContext, FromTy *> + : public NullableValueCastFailed<::clang::extractapi::RecordContext *>, + public DefaultDoCastIfPossible< + ::clang::extractapi::RecordContext *, FromTy *, + CastInfo<::clang::extractapi::RecordContext, FromTy *>> { + static inline bool isPossible(FromTy *From) { + return ::clang::extractapi::ToRecordContextCastInfoWrapper< + FromTy>::isPossible(From); + } - const llvm::Triple Target; - const Language Lang; + static inline ::clang::extractapi::RecordContext *doCast(FromTy *From) { + return ::clang::extractapi::ToRecordContextCastInfoWrapper::doCast( + From); + } +}; - llvm::DenseMap USRBasedLookupTable; - RecordMap Namespaces; - RecordMap GlobalFunctions; - RecordMap GlobalFunctionTemplates; - RecordMap - GlobalFunctionTemplateSpecializations; - RecordMap GlobalVariables; - RecordMap GlobalVariableTemplates; - RecordMap - GlobalVariableTemplateSpecializations; - RecordMap - GlobalVariableTemplatePartialSpecializations; - RecordMap Concepts; - RecordMap StaticFields; - RecordMap Enums; - RecordMap Records; - RecordMap CXXClasses; - RecordMap CXXFields; - RecordMap CXXMethods; - RecordMap CXXInstanceMethods; - RecordMap CXXStaticMethods; - RecordMap CXXMethodTemplates; - RecordMap - CXXMethodTemplateSpecializations; - RecordMap CXXFieldTemplates; - RecordMap ClassTemplates; - RecordMap ClassTemplateSpecializations; - RecordMap - ClassTemplatePartialSpecializations; - RecordMap ObjCCategories; - RecordMap ObjCInterfaces; - RecordMap ObjCProtocols; - RecordMap Macros; - RecordMap Typedefs; +template +struct CastInfo<::clang::extractapi::RecordContext, const FromTy *> + : public ConstStrippingForwardingCast< + ::clang::extractapi::RecordContext, const FromTy *, + CastInfo<::clang::extractapi::RecordContext, FromTy *>> {}; + +template +struct CastInfo + : public NullableValueCastFailed, + public DefaultDoCastIfPossible< + ToTy *, ::clang::extractapi::RecordContext *, + CastInfo> { + static inline bool isPossible(::clang::extractapi::RecordContext *Ctx) { + return ::clang::extractapi::FromRecordContextCastInfoWrapper< + ToTy>::isPossible(Ctx); + } -public: - const std::string ProductName; + static inline ToTy *doCast(::clang::extractapi::RecordContext *Ctx) { + return ::clang::extractapi::FromRecordContextCastInfoWrapper::doCast( + Ctx); + } }; -} // namespace extractapi -} // namespace clang +template +struct CastInfo + : public ConstStrippingForwardingCast< + ToTy, const ::clang::extractapi::RecordContext *, + CastInfo> {}; + +} // namespace llvm #endif // LLVM_CLANG_EXTRACTAPI_API_H diff --git a/clang/include/clang/ExtractAPI/APIRecords.inc b/clang/include/clang/ExtractAPI/APIRecords.inc new file mode 100644 index 0000000..15fee80 --- /dev/null +++ b/clang/include/clang/ExtractAPI/APIRecords.inc @@ -0,0 +1,103 @@ +//===- ExtractAPI/APIRecords.inc --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the classes defined from ExtractAPI's APIRecord +/// +//===----------------------------------------------------------------------===// + +#ifndef ABSTRACT_RECORD +#define ABSTRACT_RECORD(CLASS, BASE) RECORD(CLASS, BASE) +#endif +#ifndef CONCRETE_RECORD +#define CONCRETE_RECORD(CLASS, BASE, KIND) RECORD(CLASS, BASE) +#endif +#ifndef RECORD +#define RECORD(CLASS, BASE) +#endif + +CONCRETE_RECORD(NamespaceRecord, APIRecord, RK_Namespace) +CONCRETE_RECORD(GlobalFunctionRecord, APIRecord, RK_GlobalFunction) +CONCRETE_RECORD(GlobalFunctionTemplateRecord, GlobalFunctionRecord, + RK_GlobalFunctionTemplate) +CONCRETE_RECORD(GlobalFunctionTemplateSpecializationRecord, + GlobalFunctionRecord, RK_GlobalFunctionTemplateSpecialization) +CONCRETE_RECORD(GlobalVariableRecord, APIRecord, RK_GlobalVariable) +CONCRETE_RECORD(GlobalVariableTemplateRecord, GlobalVariableRecord, + RK_GlobalVariableTemplate) +CONCRETE_RECORD(GlobalVariableTemplateSpecializationRecord, + GlobalVariableRecord, RK_GlobalVariableTemplateSpecialization) +CONCRETE_RECORD(GlobalVariableTemplatePartialSpecializationRecord, + GlobalVariableRecord, + RK_GlobalVariableTemplatePartialSpecialization) +CONCRETE_RECORD(EnumConstantRecord, APIRecord, RK_EnumConstant) +CONCRETE_RECORD(EnumRecord, APIRecord, RK_Enum) +ABSTRACT_RECORD(RecordFieldRecord, APIRecord) +ABSTRACT_RECORD(RecordRecord, APIRecord) +CONCRETE_RECORD(StructFieldRecord, RecordFieldRecord, RK_StructField) +CONCRETE_RECORD(StructRecord, APIRecord, RK_Struct) +CONCRETE_RECORD(UnionFieldRecord, RecordFieldRecord, RK_UnionField) +CONCRETE_RECORD(UnionRecord, APIRecord, RK_Union) +CONCRETE_RECORD(CXXFieldRecord, APIRecord, RK_CXXField) +CONCRETE_RECORD(CXXFieldTemplateRecord, CXXFieldRecord, RK_CXXFieldTemplate) +ABSTRACT_RECORD(CXXMethodRecord, APIRecord) +CONCRETE_RECORD(CXXConstructorRecord, CXXMethodRecord, RK_CXXConstructorMethod) +CONCRETE_RECORD(CXXDestructorRecord, CXXMethodRecord, RK_CXXDestructorMethod) +CONCRETE_RECORD(CXXStaticMethodRecord, CXXMethodRecord, RK_CXXStaticMethod) +CONCRETE_RECORD(CXXInstanceMethodRecord, CXXMethodRecord, RK_CXXInstanceMethod) +CONCRETE_RECORD(CXXMethodTemplateRecord, CXXMethodRecord, RK_CXXMethodTemplate) +CONCRETE_RECORD(CXXMethodTemplateSpecializationRecord, CXXMethodRecord, + RK_CXXMethodTemplateSpecialization) +ABSTRACT_RECORD(ObjCPropertyRecord, APIRecord) +CONCRETE_RECORD(ObjCInstancePropertyRecord, ObjCPropertyRecord, + RK_ObjCInstanceProperty) +CONCRETE_RECORD(ObjCClassPropertyRecord, ObjCPropertyRecord, + RK_ObjCClassProperty) +CONCRETE_RECORD(ObjCInstanceVariableRecord, APIRecord, RK_ObjCIvar) +ABSTRACT_RECORD(ObjCMethodRecord, APIRecord) +CONCRETE_RECORD(ObjCInstanceMethodRecord, ObjCMethodRecord, + RK_ObjCInstanceMethod) +CONCRETE_RECORD(ObjCClassMethodRecord, ObjCMethodRecord, RK_ObjCClassMethod) +CONCRETE_RECORD(StaticFieldRecord, CXXFieldRecord, RK_StaticField) +ABSTRACT_RECORD(ObjCContainerRecord, APIRecord) +CONCRETE_RECORD(CXXClassRecord, APIRecord, RK_CXXClass) +CONCRETE_RECORD(ClassTemplateRecord, CXXClassRecord, RK_ClassTemplate) +CONCRETE_RECORD(ClassTemplateSpecializationRecord, CXXClassRecord, + RK_ClassTemplateSpecialization) +CONCRETE_RECORD(ClassTemplatePartialSpecializationRecord, CXXClassRecord, + RK_ClassTemplatePartialSpecialization) +CONCRETE_RECORD(ConceptRecord, APIRecord, RK_Concept) +CONCRETE_RECORD(ObjCCategoryRecord, ObjCContainerRecord, RK_ObjCCategory) +CONCRETE_RECORD(ObjCInterfaceRecord, ObjCContainerRecord, RK_ObjCInterface) +CONCRETE_RECORD(ObjCProtocolRecord, ObjCContainerRecord, RK_ObjCProtocol) +CONCRETE_RECORD(MacroDefinitionRecord, APIRecord, RK_MacroDefinition) +CONCRETE_RECORD(TypedefRecord, APIRecord, RK_Typedef) + +#undef CONCRETE_RECORD +#undef ABSTRACT_RECORD +#undef RECORD + +#ifndef RECORD_CONTEXT +#define RECORD_CONTEXT(CLASS, KIND) +#endif + +RECORD_CONTEXT(NamespaceRecord, RK_Namespace) +RECORD_CONTEXT(EnumRecord, RK_Enum) +RECORD_CONTEXT(StructRecord, RK_Struct) +RECORD_CONTEXT(UnionRecord, RK_Union) +RECORD_CONTEXT(ObjCCategoryRecord, RK_ObjCCategory) +RECORD_CONTEXT(ObjCInterfaceRecord, RK_ObjCInterface) +RECORD_CONTEXT(ObjCProtocolRecord, RK_ObjCProtocol) +RECORD_CONTEXT(CXXClassRecord, RK_CXXClass) +RECORD_CONTEXT(ClassTemplateRecord, RK_ClassTemplate) +RECORD_CONTEXT(ClassTemplateSpecializationRecord, + RK_ClassTemplateSpecialization) +RECORD_CONTEXT(ClassTemplatePartialSpecializationRecord, + RK_ClassTemplatePartialSpecialization) + +#undef RECORD_CONTEXT diff --git a/clang/include/clang/ExtractAPI/DeclarationFragments.h b/clang/include/clang/ExtractAPI/DeclarationFragments.h index 8a3a22d..94392c1 100644 --- a/clang/include/clang/ExtractAPI/DeclarationFragments.h +++ b/clang/include/clang/ExtractAPI/DeclarationFragments.h @@ -180,6 +180,18 @@ public: /// appending to chain up consecutive appends. DeclarationFragments &appendSpace(); + /// Append a text Fragment of a semicolon character. + /// + /// \returns a reference to the DeclarationFragments object itself after + /// appending to chain up consecutive appends. + DeclarationFragments &appendSemicolon(); + + /// Removes a trailing semicolon character if present. + /// + /// \returns a reference to the DeclarationFragments object itself after + /// removing to chain up consecutive operations. + DeclarationFragments &removeTrailingSemicolon(); + /// Get the string description of a FragmentKind \p Kind. static StringRef getFragmentKindString(FragmentKind Kind); @@ -192,12 +204,14 @@ public: static DeclarationFragments getStructureTypeFragment(const RecordDecl *Decl); private: + DeclarationFragments &appendUnduplicatedTextCharacter(char Character); std::vector Fragments; }; class AccessControl { public: AccessControl(std::string Access) : Access(Access) {} + AccessControl() : Access("public") {} const std::string &getAccess() const { return Access; } diff --git a/clang/include/clang/ExtractAPI/ExtractAPIActionBase.h b/clang/include/clang/ExtractAPI/ExtractAPIActionBase.h index ac4f391..08210a7 100644 --- a/clang/include/clang/ExtractAPI/ExtractAPIActionBase.h +++ b/clang/include/clang/ExtractAPI/ExtractAPIActionBase.h @@ -17,6 +17,8 @@ #include "clang/ExtractAPI/API.h" #include "clang/ExtractAPI/APIIgnoresList.h" +#include "clang/Frontend/CompilerInstance.h" +#include "llvm/Support/raw_ostream.h" namespace clang { @@ -29,8 +31,8 @@ protected: /// A representation of the APIs this action extracts. std::unique_ptr API; - /// A stream to the output file of this action. - std::unique_ptr OS; + /// A stream to the main output file of this action. + std::unique_ptr OS; /// The product this action is extracting API information for. std::string ProductName; @@ -46,7 +48,7 @@ protected: /// /// Use the serializer to generate output symbol graph files from /// the information gathered during the execution of Action. - void ImplEndSourceFileAction(); + void ImplEndSourceFileAction(CompilerInstance &CI); }; } // namespace clang diff --git a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h index e1c3e41..4cb8668 100644 --- a/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h +++ b/clang/include/clang/ExtractAPI/ExtractAPIVisitor.h @@ -14,23 +14,23 @@ #ifndef LLVM_CLANG_EXTRACTAPI_EXTRACT_API_VISITOR_H #define LLVM_CLANG_EXTRACTAPI_EXTRACT_API_VISITOR_H -#include "clang/AST/Availability.h" +#include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclCXX.h" +#include "clang/AST/DeclObjC.h" #include "clang/AST/DeclTemplate.h" -#include "clang/Basic/OperatorKinds.h" -#include "clang/Basic/Specifiers.h" -#include "clang/ExtractAPI/DeclarationFragments.h" -#include "llvm/ADT/FunctionExtras.h" - -#include "clang/AST/ASTContext.h" #include "clang/AST/ParentMapContext.h" #include "clang/AST/RecursiveASTVisitor.h" +#include "clang/Basic/Module.h" #include "clang/Basic/SourceManager.h" +#include "clang/Basic/Specifiers.h" #include "clang/ExtractAPI/API.h" +#include "clang/ExtractAPI/DeclarationFragments.h" #include "clang/ExtractAPI/TypedefUnderlyingTypeResolver.h" #include "clang/Index/USRGeneration.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include namespace clang { @@ -130,12 +130,6 @@ protected: void recordEnumConstants(EnumRecord *EnumRecord, const EnumDecl::enumerator_range Constants); - /// Collect API information for the record fields and associate with the - /// parent struct. - void recordRecordFields(RecordRecord *RecordRecord, - APIRecord::RecordKind FieldKind, - const RecordDecl::field_range Fields); - /// Collect API information for the Objective-C methods and associate with the /// parent container. void recordObjCMethods(ObjCContainerRecord *Container, @@ -172,6 +166,7 @@ private: return *static_cast(this); } +protected: SmallVector getBases(const CXXRecordDecl *Decl) { // FIXME: store AccessSpecifier given by inheritance SmallVector Bases; @@ -182,49 +177,54 @@ private: SymbolReference BaseClass; if (BaseSpecifier.getType().getTypePtr()->isTemplateTypeParmType()) { BaseClass.Name = API.copyString(BaseSpecifier.getType().getAsString()); - BaseClass.USR = API.recordUSR( - BaseSpecifier.getType()->getAs()->getDecl()); + if (auto *TTPTD = BaseSpecifier.getType() + ->getAs() + ->getDecl()) { + SmallString<128> USR; + index::generateUSRForDecl(TTPTD, USR); + BaseClass.USR = API.copyString(USR); + BaseClass.Source = API.copyString(getOwningModuleName(*TTPTD)); + } } else { - CXXRecordDecl *BaseClassDecl = - BaseSpecifier.getType().getTypePtr()->getAsCXXRecordDecl(); - BaseClass.Name = BaseClassDecl->getName(); - BaseClass.USR = API.recordUSR(BaseClassDecl); + BaseClass = createSymbolReferenceForDecl( + *BaseSpecifier.getType().getTypePtr()->getAsCXXRecordDecl()); } Bases.emplace_back(BaseClass); } return Bases; } - APIRecord *determineParentRecord(const DeclContext *Context) { - SmallString<128> ParentUSR; - if (Context->getDeclKind() == Decl::TranslationUnit) - return nullptr; + StringRef getOwningModuleName(const Decl &D) { + if (auto *OwningModule = D.getImportedOwningModule()) + return OwningModule->Name; - index::generateUSRForDecl(dyn_cast(Context), ParentUSR); + return {}; + } - APIRecord *Parent = API.findRecordForUSR(ParentUSR); - return Parent; + SymbolReference createHierarchyInformationForDecl(const Decl &D) { + const auto *Context = cast_if_present(D.getDeclContext()); + + if (!Context || isa(Context)) + return {}; + + return createSymbolReferenceForDecl(*Context); } -}; -template -static void modifyRecords(const T &Records, const StringRef &Name) { - for (const auto &Record : Records) { - if (Name == Record.second.get()->Name) { - auto &DeclFragment = Record.second->Declaration; - DeclFragment.insert(DeclFragment.begin(), " ", - DeclarationFragments::FragmentKind::Text); - DeclFragment.insert(DeclFragment.begin(), "typedef", - DeclarationFragments::FragmentKind::Keyword, "", - nullptr); - DeclFragment.insert(--DeclFragment.end(), " { ... } ", - DeclarationFragments::FragmentKind::Text); - DeclFragment.insert(--DeclFragment.end(), Name, - DeclarationFragments::FragmentKind::Identifier); - break; - } + SymbolReference createSymbolReferenceForDecl(const Decl &D) { + SmallString<128> USR; + index::generateUSRForDecl(&D, USR); + + APIRecord *Record = API.findRecordForUSR(USR); + if (Record) + return SymbolReference(Record); + + StringRef Name; + if (auto *ND = dyn_cast(&D)) + Name = ND->getName(); + + return API.createSymbolReference(Name, USR, getOwningModuleName(D)); } -} +}; template bool ExtractAPIVisitorBase::VisitVarDecl(const VarDecl *Decl) { @@ -251,7 +251,8 @@ bool ExtractAPIVisitorBase::VisitVarDecl(const VarDecl *Decl) { // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -267,21 +268,17 @@ bool ExtractAPIVisitorBase::VisitVarDecl(const VarDecl *Decl) { DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); if (Decl->isStaticDataMember()) { - SymbolReference Context; - // getDeclContext() should return a RecordDecl since we - // are currently handling a static data member. - auto *Record = cast(Decl->getDeclContext()); - Context.Name = Record->getName(); - Context.USR = API.recordUSR(Record); auto Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - API.addStaticField(Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Linkage, Comment, Declaration, SubHeading, Context, - Access, isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, Access, isInSystemHeader(Decl)); } else // Add the global variable record to the API set. - API.addGlobalVar(Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Linkage, Comment, Declaration, SubHeading, - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); return true; } @@ -304,7 +301,7 @@ bool ExtractAPIVisitorBase::VisitFunctionDecl( return true; } - // Skip templated functions. + // Skip templated functions that aren't processed here. switch (Decl->getTemplatedKind()) { case FunctionDecl::TK_NonTemplate: case FunctionDecl::TK_DependentNonTemplate: @@ -321,7 +318,8 @@ bool ExtractAPIVisitorBase::VisitFunctionDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -337,18 +335,19 @@ bool ExtractAPIVisitorBase::VisitFunctionDecl( FunctionSignature Signature = DeclarationFragmentsBuilder::getFunctionSignature(Decl); if (Decl->getTemplateSpecializationInfo()) - API.addGlobalFunctionTemplateSpecialization( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, - Comment, + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, DeclarationFragmentsBuilder:: getFragmentsForFunctionTemplateSpecialization(Decl), SubHeading, Signature, isInSystemHeader(Decl)); else // Add the function record to the API set. - API.addGlobalFunction( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, - Comment, DeclarationFragmentsBuilder::getFragmentsForFunction(Decl), - SubHeading, Signature, isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, + DeclarationFragmentsBuilder::getFragmentsForFunction(Decl), SubHeading, + Signature, isInSystemHeader(Decl)); return true; } @@ -368,7 +367,8 @@ bool ExtractAPIVisitorBase::VisitEnumDecl(const EnumDecl *Decl) { Name = QualifiedNameBuffer.str(); } - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -382,13 +382,13 @@ bool ExtractAPIVisitorBase::VisitEnumDecl(const EnumDecl *Decl) { DeclarationFragmentsBuilder::getFragmentsForEnum(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - EnumRecord *EnumRecord = API.addEnum( - API.copyString(Name), USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Comment, Declaration, SubHeading, isInSystemHeader(Decl)); + auto *ER = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + isInSystemHeader(Decl)); // Now collect information about the enumerators in this enum. - getDerivedExtractAPIVisitor().recordEnumConstants(EnumRecord, - Decl->enumerators()); + getDerivedExtractAPIVisitor().recordEnumConstants(ER, Decl->enumerators()); return true; } @@ -476,13 +476,13 @@ bool ExtractAPIVisitorBase::WalkUpFromNamespaceDecl( template bool ExtractAPIVisitorBase::VisitNamespaceDecl( const NamespaceDecl *Decl) { - if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl)) return true; if (Decl->isAnonymousNamespace()) return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); @@ -497,10 +497,10 @@ bool ExtractAPIVisitorBase::VisitNamespaceDecl( DeclarationFragmentsBuilder::getFragmentsForNamespace(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - APIRecord *Parent = determineParentRecord(Decl->getDeclContext()); - API.addNamespace(Parent, Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, - Declaration, SubHeading, isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); return true; } @@ -509,14 +509,20 @@ template bool ExtractAPIVisitorBase::VisitRecordDecl(const RecordDecl *Decl) { if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl)) return true; + + SmallString<128> QualifiedNameBuffer; // Collect symbol information. StringRef Name = Decl->getName(); if (Name.empty()) Name = getTypedefName(Decl); - if (Name.empty()) - return true; + if (Name.empty()) { + llvm::raw_svector_ostream OS(QualifiedNameBuffer); + Decl->printQualifiedName(OS); + Name = QualifiedNameBuffer.str(); + } - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -531,21 +537,16 @@ bool ExtractAPIVisitorBase::VisitRecordDecl(const RecordDecl *Decl) { DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - auto RecordKind = APIRecord::RK_Struct; - auto FieldRecordKind = APIRecord::RK_StructField; - - if (Decl->isUnion()) { - RecordKind = APIRecord::RK_Union; - FieldRecordKind = APIRecord::RK_UnionField; - } - - RecordRecord *RecordRecord = API.addRecord( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, RecordKind, isInSystemHeader(Decl)); - - // Now collect information about the fields in this struct. - getDerivedExtractAPIVisitor().recordRecordFields( - RecordRecord, FieldRecordKind, Decl->fields()); + if (Decl->isUnion()) + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); + else + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); return true; } @@ -558,7 +559,8 @@ bool ExtractAPIVisitorBase::VisitCXXRecordDecl( return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -580,24 +582,25 @@ bool ExtractAPIVisitorBase::VisitCXXRecordDecl( Kind = APIRecord::RecordKind::RK_CXXClass; auto Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - APIRecord *Parent = determineParentRecord(Decl->getDeclContext()); - CXXClassRecord *CXXClassRecord; + CXXClassRecord *Record; if (Decl->getDescribedClassTemplate()) { // Inject template fragments before class fragments. Declaration.insert( Declaration.begin(), DeclarationFragmentsBuilder::getFragmentsForRedeclarableTemplate( Decl->getDescribedClassTemplate())); - CXXClassRecord = API.addClassTemplate( - Parent, Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Template(Decl->getDescribedClassTemplate()), - Access, isInSystemHeader(Decl)); + Record = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, Template(Decl->getDescribedClassTemplate()), Access, + isInSystemHeader(Decl)); } else - CXXClassRecord = API.addCXXClass( - Parent, Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Kind, Access, isInSystemHeader(Decl)); + Record = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, Kind, Access, isInSystemHeader(Decl)); - CXXClassRecord->Bases = getBases(Decl); + Record->Bases = getBases(Decl); return true; } @@ -614,7 +617,8 @@ bool ExtractAPIVisitorBase::VisitCXXMethodDecl( if (isa(Decl) || isa(Decl)) return true; - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -627,14 +631,10 @@ bool ExtractAPIVisitorBase::VisitCXXMethodDecl( auto Access = DeclarationFragmentsBuilder::getAccessControl(Decl); auto Signature = DeclarationFragmentsBuilder::getFunctionSignature(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); - auto *Parent = API.findRecordForUSR(ParentUSR); - if (Decl->isTemplated()) { - FunctionTemplateDecl *TemplateDecl = Decl->getDescribedFunctionTemplate(); - API.addCXXMethodTemplate( - API.findRecordForUSR(ParentUSR), Decl->getName(), USR, Loc, + if (FunctionTemplateDecl *TemplateDecl = + Decl->getDescribedFunctionTemplate()) { + API.createRecord( + USR, Decl->getName(), createHierarchyInformationForDecl(*Decl), Loc, AvailabilityInfo::createFromDecl(Decl), Comment, DeclarationFragmentsBuilder::getFragmentsForFunctionTemplate( TemplateDecl), @@ -642,27 +642,27 @@ bool ExtractAPIVisitorBase::VisitCXXMethodDecl( DeclarationFragmentsBuilder::getAccessControl(TemplateDecl), Template(TemplateDecl), isInSystemHeader(Decl)); } else if (Decl->getTemplateSpecializationInfo()) - API.addCXXMethodTemplateSpec( - Parent, Decl->getName(), USR, Loc, + API.createRecord( + USR, Decl->getName(), createHierarchyInformationForDecl(*Decl), Loc, AvailabilityInfo::createFromDecl(Decl), Comment, DeclarationFragmentsBuilder:: getFragmentsForFunctionTemplateSpecialization(Decl), SubHeading, Signature, Access, isInSystemHeader(Decl)); else if (Decl->isOverloadedOperator()) - API.addCXXInstanceMethod( - Parent, API.copyString(Decl->getNameAsString()), USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, + API.createRecord( + USR, Decl->getNameAsString(), createHierarchyInformationForDecl(*Decl), + Loc, AvailabilityInfo::createFromDecl(Decl), Comment, DeclarationFragmentsBuilder::getFragmentsForOverloadedOperator(Decl), SubHeading, Signature, Access, isInSystemHeader(Decl)); else if (Decl->isStatic()) - API.addCXXStaticMethod( - Parent, Decl->getName(), USR, Loc, + API.createRecord( + USR, Decl->getName(), createHierarchyInformationForDecl(*Decl), Loc, AvailabilityInfo::createFromDecl(Decl), Comment, DeclarationFragmentsBuilder::getFragmentsForCXXMethod(Decl), SubHeading, Signature, Access, isInSystemHeader(Decl)); else - API.addCXXInstanceMethod( - Parent, Decl->getName(), USR, Loc, + API.createRecord( + USR, Decl->getName(), createHierarchyInformationForDecl(*Decl), Loc, AvailabilityInfo::createFromDecl(Decl), Comment, DeclarationFragmentsBuilder::getFragmentsForCXXMethod(Decl), SubHeading, Signature, Access, isInSystemHeader(Decl)); @@ -673,9 +673,13 @@ bool ExtractAPIVisitorBase::VisitCXXMethodDecl( template bool ExtractAPIVisitorBase::VisitCXXConstructorDecl( const CXXConstructorDecl *Decl) { + if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl) || + Decl->isImplicit()) + return true; - StringRef Name = API.copyString(Decl->getNameAsString()); - StringRef USR = API.recordUSR(Decl); + auto Name = Decl->getNameAsString(); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -692,22 +696,24 @@ bool ExtractAPIVisitorBase::VisitCXXConstructorDecl( FunctionSignature Signature = DeclarationFragmentsBuilder::getFunctionSignature(Decl); AccessControl Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); - API.addCXXInstanceMethod(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Signature, Access, - isInSystemHeader(Decl)); + + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + Signature, Access, isInSystemHeader(Decl)); return true; } template bool ExtractAPIVisitorBase::VisitCXXDestructorDecl( const CXXDestructorDecl *Decl) { + if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl) || + Decl->isImplicit()) + return true; - StringRef Name = API.copyString(Decl->getNameAsString()); - StringRef USR = API.recordUSR(Decl); + auto Name = Decl->getNameAsString(); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -724,13 +730,10 @@ bool ExtractAPIVisitorBase::VisitCXXDestructorDecl( FunctionSignature Signature = DeclarationFragmentsBuilder::getFunctionSignature(Decl); AccessControl Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); - API.addCXXInstanceMethod(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Signature, Access, - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + Signature, Access, isInSystemHeader(Decl)); return true; } @@ -740,7 +743,8 @@ bool ExtractAPIVisitorBase::VisitConceptDecl(const ConceptDecl *Decl) { return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -752,9 +756,10 @@ bool ExtractAPIVisitorBase::VisitConceptDecl(const ConceptDecl *Decl) { DeclarationFragmentsBuilder::getFragmentsForConcept(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - API.addConcept(Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Comment, Declaration, SubHeading, Template(Decl), - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + Template(Decl), isInSystemHeader(Decl)); return true; } @@ -765,7 +770,8 @@ bool ExtractAPIVisitorBase::VisitClassTemplateSpecializationDecl( return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -779,14 +785,13 @@ bool ExtractAPIVisitorBase::VisitClassTemplateSpecializationDecl( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - APIRecord *Parent = determineParentRecord(Decl->getDeclContext()); - auto *ClassTemplateSpecializationRecord = API.addClassTemplateSpecialization( - Parent, Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, + auto *CTSR = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, DeclarationFragmentsBuilder::getAccessControl(Decl), isInSystemHeader(Decl)); - ClassTemplateSpecializationRecord->Bases = getBases(Decl); + CTSR->Bases = getBases(Decl); return true; } @@ -799,7 +804,8 @@ bool ExtractAPIVisitorBase:: return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -811,15 +817,13 @@ bool ExtractAPIVisitorBase:: getFragmentsForClassTemplatePartialSpecialization(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - APIRecord *Parent = determineParentRecord(Decl->getDeclContext()); - auto *ClassTemplatePartialSpecRecord = - API.addClassTemplatePartialSpecialization( - Parent, Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Comment, Declaration, SubHeading, Template(Decl), - DeclarationFragmentsBuilder::getAccessControl(Decl), - isInSystemHeader(Decl)); + auto *CTPSR = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + Template(Decl), DeclarationFragmentsBuilder::getAccessControl(Decl), + isInSystemHeader(Decl)); - ClassTemplatePartialSpecRecord->Bases = getBases(Decl); + CTPSR->Bases = getBases(Decl); return true; } @@ -832,7 +836,8 @@ bool ExtractAPIVisitorBase::VisitVarTemplateDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -853,20 +858,17 @@ bool ExtractAPIVisitorBase::VisitVarTemplateDecl( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); if (Decl->getDeclContext()->getDeclKind() == Decl::CXXRecord) - API.addCXXFieldTemplate(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, - DeclarationFragmentsBuilder::getAccessControl(Decl), - Template(Decl), isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, DeclarationFragmentsBuilder::getAccessControl(Decl), + Template(Decl), isInSystemHeader(Decl)); else - API.addGlobalVariableTemplate(Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), - Linkage, Comment, Declaration, SubHeading, - Template(Decl), isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, Template(Decl), isInSystemHeader(Decl)); return true; } @@ -878,7 +880,8 @@ bool ExtractAPIVisitorBase::VisitVarTemplateSpecializationDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -894,9 +897,10 @@ bool ExtractAPIVisitorBase::VisitVarTemplateSpecializationDecl( Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - API.addGlobalVariableTemplateSpecialization( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, - Declaration, SubHeading, isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); return true; } @@ -908,7 +912,8 @@ bool ExtractAPIVisitorBase::VisitVarTemplatePartialSpecializationDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -923,9 +928,10 @@ bool ExtractAPIVisitorBase::VisitVarTemplatePartialSpecializationDecl( getFragmentsForVarTemplatePartialSpecialization(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - API.addGlobalVariableTemplatePartialSpecialization( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, - Declaration, SubHeading, Template(Decl), isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, Template(Decl), isInSystemHeader(Decl)); return true; } @@ -939,7 +945,8 @@ bool ExtractAPIVisitorBase::VisitFunctionTemplateDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -954,8 +961,9 @@ bool ExtractAPIVisitorBase::VisitFunctionTemplateDecl( FunctionSignature Signature = DeclarationFragmentsBuilder::getFunctionSignature( Decl->getTemplatedDecl()); - API.addGlobalFunctionTemplate( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, DeclarationFragmentsBuilder::getFragmentsForFunctionTemplate(Decl), SubHeading, Signature, Template(Decl), isInSystemHeader(Decl)); @@ -970,7 +978,8 @@ bool ExtractAPIVisitorBase::VisitObjCInterfaceDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); LinkageInfo Linkage = Decl->getLinkageAndVisibility(); @@ -988,24 +997,23 @@ bool ExtractAPIVisitorBase::VisitObjCInterfaceDecl( // Collect super class information. SymbolReference SuperClass; - if (const auto *SuperClassDecl = Decl->getSuperClass()) { - SuperClass.Name = SuperClassDecl->getObjCRuntimeNameAsString(); - SuperClass.USR = API.recordUSR(SuperClassDecl); - } + if (const auto *SuperClassDecl = Decl->getSuperClass()) + SuperClass = createSymbolReferenceForDecl(*SuperClassDecl); - ObjCInterfaceRecord *ObjCInterfaceRecord = API.addObjCInterface( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, - Declaration, SubHeading, SuperClass, isInSystemHeader(Decl)); + auto *InterfaceRecord = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Linkage, Comment, Declaration, + SubHeading, SuperClass, isInSystemHeader(Decl)); // Record all methods (selectors). This doesn't include automatically // synthesized property methods. - getDerivedExtractAPIVisitor().recordObjCMethods(ObjCInterfaceRecord, + getDerivedExtractAPIVisitor().recordObjCMethods(InterfaceRecord, Decl->methods()); - getDerivedExtractAPIVisitor().recordObjCProperties(ObjCInterfaceRecord, + getDerivedExtractAPIVisitor().recordObjCProperties(InterfaceRecord, Decl->properties()); - getDerivedExtractAPIVisitor().recordObjCInstanceVariables(ObjCInterfaceRecord, + getDerivedExtractAPIVisitor().recordObjCInstanceVariables(InterfaceRecord, Decl->ivars()); - getDerivedExtractAPIVisitor().recordObjCProtocols(ObjCInterfaceRecord, + getDerivedExtractAPIVisitor().recordObjCProtocols(InterfaceRecord, Decl->protocols()); return true; @@ -1019,7 +1027,8 @@ bool ExtractAPIVisitorBase::VisitObjCProtocolDecl( // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -1034,15 +1043,15 @@ bool ExtractAPIVisitorBase::VisitObjCProtocolDecl( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - ObjCProtocolRecord *ObjCProtocolRecord = API.addObjCProtocol( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, isInSystemHeader(Decl)); + auto *ProtoRecord = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + isInSystemHeader(Decl)); - getDerivedExtractAPIVisitor().recordObjCMethods(ObjCProtocolRecord, - Decl->methods()); - getDerivedExtractAPIVisitor().recordObjCProperties(ObjCProtocolRecord, + getDerivedExtractAPIVisitor().recordObjCMethods(ProtoRecord, Decl->methods()); + getDerivedExtractAPIVisitor().recordObjCProperties(ProtoRecord, Decl->properties()); - getDerivedExtractAPIVisitor().recordObjCProtocols(ObjCProtocolRecord, + getDerivedExtractAPIVisitor().recordObjCProtocols(ProtoRecord, Decl->protocols()); return true; @@ -1061,25 +1070,36 @@ bool ExtractAPIVisitorBase::VisitTypedefNameDecl( if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl)) return true; - // Add the notion of typedef for tag type (struct or enum) of the same name. - if (const ElaboratedType *ET = - dyn_cast(Decl->getUnderlyingType())) { - if (const TagType *TagTy = dyn_cast(ET->desugar())) { - if (Decl->getName() == TagTy->getDecl()->getName()) { - if (isa(TagTy->getDecl())) { - modifyRecords(API.getRecords(), Decl->getName()); - } - if (TagTy->getDecl()->isEnum()) { - modifyRecords(API.getEnums(), Decl->getName()); - } + StringRef Name = Decl->getName(); + + // If the underlying type was defined as part of the typedef modify it's + // fragments directly and pretend the typedef doesn't exist. + if (auto *TagDecl = Decl->getUnderlyingType()->getAsTagDecl()) { + if (TagDecl->getName() == Decl->getName() && + TagDecl->isEmbeddedInDeclarator() && TagDecl->isCompleteDefinition()) { + SmallString<128> TagUSR; + index::generateUSRForDecl(TagDecl, TagUSR); + if (auto *Record = API.findRecordForUSR(TagUSR)) { + DeclarationFragments LeadingFragments; + LeadingFragments.append("typedef", + DeclarationFragments::FragmentKind::Keyword, "", + nullptr); + LeadingFragments.appendSpace(); + Record->Declaration.removeTrailingSemicolon() + .insert(Record->Declaration.begin(), std::move(LeadingFragments)) + .append(" { ... } ", DeclarationFragments::FragmentKind::Text) + .append(Name, DeclarationFragments::FragmentKind::Identifier) + .appendSemicolon(); + + return true; } } } PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); - StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); DocComment Comment; if (auto *RawComment = getDerivedExtractAPIVisitor().fetchRawCommentForDecl(Decl)) @@ -1091,11 +1111,12 @@ bool ExtractAPIVisitorBase::VisitTypedefNameDecl( TypedefUnderlyingTypeResolver(Context).getSymbolReferenceForType(Type, API); - API.addTypedef(Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), - Comment, - DeclarationFragmentsBuilder::getFragmentsForTypedef(Decl), - DeclarationFragmentsBuilder::getSubHeading(Decl), SymRef, - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, + DeclarationFragmentsBuilder::getFragmentsForTypedef(Decl), + DeclarationFragmentsBuilder::getSubHeading(Decl), SymRef, + isInSystemHeader(Decl)); return true; } @@ -1107,7 +1128,8 @@ bool ExtractAPIVisitorBase::VisitObjCCategoryDecl( return true; StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -1122,29 +1144,20 @@ bool ExtractAPIVisitorBase::VisitObjCCategoryDecl( DeclarationFragmentsBuilder::getSubHeading(Decl); const ObjCInterfaceDecl *InterfaceDecl = Decl->getClassInterface(); - SymbolReference Interface(InterfaceDecl->getName(), - API.recordUSR(InterfaceDecl)); - - bool IsFromExternalModule = true; - for (const auto &Interface : API.getObjCInterfaces()) { - if (InterfaceDecl->getName() == Interface.second.get()->Name) { - IsFromExternalModule = false; - break; - } - } + SymbolReference Interface = createSymbolReferenceForDecl(*InterfaceDecl); - ObjCCategoryRecord *ObjCCategoryRecord = API.addObjCCategory( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Interface, isInSystemHeader(Decl), - IsFromExternalModule); + auto *CategoryRecord = API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, SubHeading, + Interface, isInSystemHeader(Decl)); - getDerivedExtractAPIVisitor().recordObjCMethods(ObjCCategoryRecord, + getDerivedExtractAPIVisitor().recordObjCMethods(CategoryRecord, Decl->methods()); - getDerivedExtractAPIVisitor().recordObjCProperties(ObjCCategoryRecord, + getDerivedExtractAPIVisitor().recordObjCProperties(CategoryRecord, Decl->properties()); - getDerivedExtractAPIVisitor().recordObjCInstanceVariables(ObjCCategoryRecord, + getDerivedExtractAPIVisitor().recordObjCInstanceVariables(CategoryRecord, Decl->ivars()); - getDerivedExtractAPIVisitor().recordObjCProtocols(ObjCCategoryRecord, + getDerivedExtractAPIVisitor().recordObjCProtocols(CategoryRecord, Decl->protocols()); return true; @@ -1158,7 +1171,8 @@ void ExtractAPIVisitorBase::recordEnumConstants( for (const auto *Constant : Constants) { // Collect symbol information. StringRef Name = Constant->getName(); - StringRef USR = API.recordUSR(Constant); + SmallString<128> USR; + index::generateUSRForDecl(Constant, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Constant->getLocation()); DocComment Comment; @@ -1173,51 +1187,26 @@ void ExtractAPIVisitorBase::recordEnumConstants( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Constant); - API.addEnumConstant(EnumRecord, Name, USR, Loc, - AvailabilityInfo::createFromDecl(Constant), Comment, - Declaration, SubHeading, isInSystemHeader(Constant)); - } -} - -/// Collect API information for the struct fields and associate with the -/// parent struct. -template -void ExtractAPIVisitorBase::recordRecordFields( - RecordRecord *RecordRecord, APIRecord::RecordKind FieldKind, - const RecordDecl::field_range Fields) { - for (const auto *Field : Fields) { - // Collect symbol information. - StringRef Name = Field->getName(); - StringRef USR = API.recordUSR(Field); - PresumedLoc Loc = - Context.getSourceManager().getPresumedLoc(Field->getLocation()); - DocComment Comment; - if (auto *RawComment = - getDerivedExtractAPIVisitor().fetchRawCommentForDecl(Field)) - Comment = RawComment->getFormattedLines(Context.getSourceManager(), - Context.getDiagnostics()); - - // Build declaration fragments and sub-heading for the struct field. - DeclarationFragments Declaration = - DeclarationFragmentsBuilder::getFragmentsForField(Field); - DeclarationFragments SubHeading = - DeclarationFragmentsBuilder::getSubHeading(Field); - - API.addRecordField( - RecordRecord, Name, USR, Loc, AvailabilityInfo::createFromDecl(Field), - Comment, Declaration, SubHeading, FieldKind, isInSystemHeader(Field)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Constant), Loc, + AvailabilityInfo::createFromDecl(Constant), Comment, Declaration, + SubHeading, isInSystemHeader(Constant)); } } template bool ExtractAPIVisitorBase::VisitFieldDecl(const FieldDecl *Decl) { - if (Decl->getDeclContext()->getDeclKind() == Decl::Record) + // ObjCIvars are handled separately + if (isa(Decl) || isa(Decl)) return true; - if (isa(Decl)) + + if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl)) return true; + // Collect symbol information. StringRef Name = Decl->getName(); - StringRef USR = API.recordUSR(Decl); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -1231,22 +1220,40 @@ bool ExtractAPIVisitorBase::VisitFieldDecl(const FieldDecl *Decl) { DeclarationFragmentsBuilder::getFragmentsForField(Decl); DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Decl); - AccessControl Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); - API.addCXXField(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, - SubHeading, Access, isInSystemHeader(Decl)); + if (isa(Decl->getDeclContext())) { + AccessControl Access = DeclarationFragmentsBuilder::getAccessControl(Decl); + + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, Access, isInSystemHeader(Decl)); + } else if (auto *RD = dyn_cast(Decl->getDeclContext())) { + if (RD->isUnion()) + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); + else + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, isInSystemHeader(Decl)); + } + return true; } template bool ExtractAPIVisitorBase::VisitCXXConversionDecl( const CXXConversionDecl *Decl) { - StringRef Name = API.copyString(Decl->getNameAsString()); - StringRef USR = API.recordUSR(Decl); + if (!getDerivedExtractAPIVisitor().shouldDeclBeIncluded(Decl) || + Decl->isImplicit()) + return true; + + auto Name = Decl->getNameAsString(); + SmallString<128> USR; + index::generateUSRForDecl(Decl, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Decl->getLocation()); DocComment Comment; @@ -1264,19 +1271,17 @@ bool ExtractAPIVisitorBase::VisitCXXConversionDecl( DeclarationFragmentsBuilder::getFunctionSignature(Decl); AccessControl Access = DeclarationFragmentsBuilder::getAccessControl(Decl); - SmallString<128> ParentUSR; - index::generateUSRForDecl(dyn_cast(Decl->getDeclContext()), - ParentUSR); if (Decl->isStatic()) - API.addCXXStaticMethod(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Signature, Access, - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, Signature, Access, isInSystemHeader(Decl)); else - API.addCXXInstanceMethod(API.findRecordForUSR(ParentUSR), Name, USR, Loc, - AvailabilityInfo::createFromDecl(Decl), Comment, - Declaration, SubHeading, Signature, Access, - isInSystemHeader(Decl)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Decl), Loc, + AvailabilityInfo::createFromDecl(Decl), Comment, Declaration, + SubHeading, Signature, Access, isInSystemHeader(Decl)); + return true; } @@ -1291,8 +1296,9 @@ void ExtractAPIVisitorBase::recordObjCMethods( if (Method->isPropertyAccessor()) continue; - StringRef Name = API.copyString(Method->getSelector().getAsString()); - StringRef USR = API.recordUSR(Method); + auto Name = Method->getSelector().getAsString(); + SmallString<128> USR; + index::generateUSRForDecl(Method, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Method->getLocation()); DocComment Comment; @@ -1309,10 +1315,16 @@ void ExtractAPIVisitorBase::recordObjCMethods( FunctionSignature Signature = DeclarationFragmentsBuilder::getFunctionSignature(Method); - API.addObjCMethod(Container, Name, USR, Loc, - AvailabilityInfo::createFromDecl(Method), Comment, - Declaration, SubHeading, Signature, - Method->isInstanceMethod(), isInSystemHeader(Method)); + if (Method->isInstanceMethod()) + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Method), Loc, + AvailabilityInfo::createFromDecl(Method), Comment, Declaration, + SubHeading, Signature, isInSystemHeader(Method)); + else + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Method), Loc, + AvailabilityInfo::createFromDecl(Method), Comment, Declaration, + SubHeading, Signature, isInSystemHeader(Method)); } } @@ -1322,7 +1334,8 @@ void ExtractAPIVisitorBase::recordObjCProperties( const ObjCContainerDecl::prop_range Properties) { for (const auto *Property : Properties) { StringRef Name = Property->getName(); - StringRef USR = API.recordUSR(Property); + SmallString<128> USR; + index::generateUSRForDecl(Property, USR); PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Property->getLocation()); DocComment Comment; @@ -1337,10 +1350,8 @@ void ExtractAPIVisitorBase::recordObjCProperties( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Property); - StringRef GetterName = - API.copyString(Property->getGetterName().getAsString()); - StringRef SetterName = - API.copyString(Property->getSetterName().getAsString()); + auto GetterName = Property->getGetterName().getAsString(); + auto SetterName = Property->getSetterName().getAsString(); // Get the attributes for property. unsigned Attributes = ObjCPropertyRecord::NoAttr; @@ -1348,14 +1359,22 @@ void ExtractAPIVisitorBase::recordObjCProperties( ObjCPropertyAttribute::kind_readonly) Attributes |= ObjCPropertyRecord::ReadOnly; - API.addObjCProperty( - Container, Name, USR, Loc, AvailabilityInfo::createFromDecl(Property), - Comment, Declaration, SubHeading, - static_cast(Attributes), GetterName, - SetterName, Property->isOptional(), - !(Property->getPropertyAttributes() & - ObjCPropertyAttribute::kind_class), - isInSystemHeader(Property)); + if (Property->getPropertyAttributes() & ObjCPropertyAttribute::kind_class) + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Property), Loc, + AvailabilityInfo::createFromDecl(Property), Comment, Declaration, + SubHeading, + static_cast(Attributes), + GetterName, SetterName, Property->isOptional(), + isInSystemHeader(Property)); + else + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Property), Loc, + AvailabilityInfo::createFromDecl(Property), Comment, Declaration, + SubHeading, + static_cast(Attributes), + GetterName, SetterName, Property->isOptional(), + isInSystemHeader(Property)); } } @@ -1367,7 +1386,9 @@ void ExtractAPIVisitorBase::recordObjCInstanceVariables( Ivars) { for (const auto *Ivar : Ivars) { StringRef Name = Ivar->getName(); - StringRef USR = API.recordUSR(Ivar); + SmallString<128> USR; + index::generateUSRForDecl(Ivar, USR); + PresumedLoc Loc = Context.getSourceManager().getPresumedLoc(Ivar->getLocation()); DocComment Comment; @@ -1382,12 +1403,10 @@ void ExtractAPIVisitorBase::recordObjCInstanceVariables( DeclarationFragments SubHeading = DeclarationFragmentsBuilder::getSubHeading(Ivar); - ObjCInstanceVariableRecord::AccessControl Access = - Ivar->getCanonicalAccessControl(); - - API.addObjCInstanceVariable( - Container, Name, USR, Loc, AvailabilityInfo::createFromDecl(Ivar), - Comment, Declaration, SubHeading, Access, isInSystemHeader(Ivar)); + API.createRecord( + USR, Name, createHierarchyInformationForDecl(*Ivar), Loc, + AvailabilityInfo::createFromDecl(Ivar), Comment, Declaration, + SubHeading, isInSystemHeader(Ivar)); } } @@ -1396,8 +1415,7 @@ void ExtractAPIVisitorBase::recordObjCProtocols( ObjCContainerRecord *Container, ObjCInterfaceDecl::protocol_range Protocols) { for (const auto *Protocol : Protocols) - Container->Protocols.emplace_back(Protocol->getName(), - API.recordUSR(Protocol)); + Container->Protocols.emplace_back(createSymbolReferenceForDecl(*Protocol)); } } // namespace impl diff --git a/clang/include/clang/ExtractAPI/FrontendActions.h b/clang/include/clang/ExtractAPI/FrontendActions.h index c67864a..08045a3 100644 --- a/clang/include/clang/ExtractAPI/FrontendActions.h +++ b/clang/include/clang/ExtractAPI/FrontendActions.h @@ -49,9 +49,6 @@ private: void EndSourceFileAction() override; static StringRef getInputBufferName() { return ""; } - - static std::unique_ptr - CreateOutputFile(CompilerInstance &CI, StringRef InFile); }; /// Wrap ExtractAPIAction on top of a pre-existing action @@ -85,9 +82,6 @@ private: /// actions. This is the place where all the gathered symbol graph /// information is emited. void EndSourceFileAction() override; - - static std::unique_ptr - CreateOutputFile(CompilerInstance &CI, StringRef InFile); }; } // namespace clang diff --git a/clang/include/clang/ExtractAPI/Serialization/APISetVisitor.h b/clang/include/clang/ExtractAPI/Serialization/APISetVisitor.h new file mode 100644 index 0000000..07f14f3 --- /dev/null +++ b/clang/include/clang/ExtractAPI/Serialization/APISetVisitor.h @@ -0,0 +1,172 @@ +//===- ExtractAPI/Serialization/APISetVisitor.h ----------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file defines the ExtractAPI APISetVisitor interface. +/// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H +#define LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H + +#include "clang/ExtractAPI/API.h" + +namespace clang { +namespace extractapi { + +// A helper macro to implement short-circuiting when recursing. It +// invokes CALL_EXPR, which must be a method call, on the derived +// object (s.t. a user of RecursiveASTVisitor can override the method +// in CALL_EXPR). +#define TRY_TO(CALL_EXPR) \ + do { \ + if (!getDerived()->CALL_EXPR) \ + return false; \ + } while (false) + +/// The base interface of visitors for API information, the interface and usage +/// is almost identical to RecurisveASTVistor. This class performs three +/// distinct tasks: +/// 1. traverse the APISet (i.e. go to every record); +/// 2. at a given record, walk up the class hierarchy starting from the record's +/// dynamic type until APIRecord is reached. +/// 3. given a (record, class) combination where 'class' is some base class of +/// the dynamic type of 'record', call a user-overridable function to actually +/// visit the record. +/// +/// These tasks are done by three groups of methods, respectively: +/// 1. traverseRecord(APIRecord *x) does task #1, it is the entry point for +/// traversing the records starting from x. This method simply forwards to +/// traverseFoo(Foo *x) where Foo is the dynamic type of *x, which calls +/// walkUpFromFoo(x) and then recursively visits the child records of x. +/// 2. walkUpFromFoo(Foo *x) does task #2. It doesn't visit children records of +/// x, instead it first calls walkUpFromBar(x) where Bar is the direct parent +/// class of Foo (unless Foo has no parent) and then calls visitFoo(x). +/// 3. visitFoo(Foo *x) does task #3. +/// +/// These three method groups are tiered (traverse* > walkUpFrom* > +/// visit*). A method (e.g. traverse*) may call methods from the same +/// tier (e.g. other traverse*) or one tier lower (e.g. walkUpFrom*). +/// It may not call methods from a higher tier. +/// +/// Note that since walkUpFromFoo() calls walkUpFromBar() (where Bar +/// is Foo's super class) before calling visitFoo(), the result is +/// that the visit*() methods for a given record are called in the +/// top-down order (e.g. for a record of type ObjCInstancePropertyRecord, the +/// order will be visitRecord(), visitObjCPropertyRecord(), and then +/// visitObjCInstancePropertyRecord()). +/// +/// This scheme guarantees that all visit*() calls for the same record +/// are grouped together. In other words, visit*() methods for different +/// records are never interleaved. +/// +/// Clients of this visitor should subclass the visitor (providing +/// themselves as the template argument, using the curiously recurring +/// template pattern) and override any of the traverse*, walkUpFrom*, +/// and visit* methods for records where the visitor should customize +/// behavior. Most users only need to override visit*. Advanced +/// users may override traverse* and walkUpFrom* to implement custom +/// traversal strategies. Returning false from one of these overridden +/// functions will abort the entire traversal. +template class APISetVisitor { +public: + bool traverseAPISet() { + for (const APIRecord *TLR : API.getTopLevelRecords()) { + TRY_TO(traverseAPIRecord(TLR)); + } + return true; + } + + bool traverseAPIRecord(const APIRecord *Record); + bool walkUpFromAPIRecord(const APIRecord *Record) { + TRY_TO(visitAPIRecord(Record)); + return true; + } + bool visitAPIRecord(const APIRecord *Record) { return true; } + +#define GENERATE_TRAVERSE_METHOD(CLASS, BASE) \ + bool traverse##CLASS(const CLASS *Record) { \ + TRY_TO(walkUpFrom##CLASS(Record)); \ + TRY_TO(traverseRecordContext(dyn_cast(Record))); \ + return true; \ + } + +#define GENERATE_WALKUP_AND_VISIT_METHODS(CLASS, BASE) \ + bool walkUpFrom##CLASS(const CLASS *Record) { \ + TRY_TO(walkUpFrom##BASE(Record)); \ + TRY_TO(visit##CLASS(Record)); \ + return true; \ + } \ + bool visit##CLASS(const CLASS *Record) { return true; } + +#define CONCRETE_RECORD(CLASS, BASE, KIND) \ + GENERATE_TRAVERSE_METHOD(CLASS, BASE) \ + GENERATE_WALKUP_AND_VISIT_METHODS(CLASS, BASE) + +#define ABSTRACT_RECORD(CLASS, BASE) \ + GENERATE_WALKUP_AND_VISIT_METHODS(CLASS, BASE) + +#include "../APIRecords.inc" + +#undef GENERATE_WALKUP_AND_VISIT_METHODS +#undef GENERATE_TRAVERSE_METHOD + + bool traverseRecordContext(const RecordContext *); + +protected: + const APISet &API; + +public: + APISetVisitor() = delete; + APISetVisitor(const APISetVisitor &) = delete; + APISetVisitor(APISetVisitor &&) = delete; + APISetVisitor &operator=(const APISetVisitor &) = delete; + APISetVisitor &operator=(APISetVisitor &&) = delete; + +protected: + APISetVisitor(const APISet &API) : API(API) {} + ~APISetVisitor() = default; + + Derived *getDerived() { return static_cast(this); }; +}; + +template +bool APISetVisitor::traverseRecordContext( + const RecordContext *Context) { + if (!Context) + return true; + + for (auto *Child : Context->records()) + TRY_TO(traverseAPIRecord(Child)); + + return true; +} + +template +bool APISetVisitor::traverseAPIRecord(const APIRecord *Record) { + switch (Record->getKind()) { +#define CONCRETE_RECORD(CLASS, BASE, KIND) \ + case APIRecord::KIND: { \ + TRY_TO(traverse##CLASS(static_cast(Record))); \ + break; \ + } +#include "../APIRecords.inc" + case APIRecord::RK_Unknown: { + TRY_TO(walkUpFromAPIRecord(static_cast(Record))); + break; + } + default: + llvm_unreachable("API Record with uninstantiable kind"); + } + return true; +} + +} // namespace extractapi +} // namespace clang + +#endif // LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H diff --git a/clang/include/clang/ExtractAPI/Serialization/SerializerBase.h b/clang/include/clang/ExtractAPI/Serialization/SerializerBase.h deleted file mode 100644 index f0629a9..0000000 --- a/clang/include/clang/ExtractAPI/Serialization/SerializerBase.h +++ /dev/null @@ -1,314 +0,0 @@ -//===- ExtractAPI/Serialization/SerializerBase.h ----------------*- C++ -*-===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// -/// -/// \file -/// This file defines the ExtractAPI APISetVisitor interface. -/// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H -#define LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H - -#include "clang/ExtractAPI/API.h" - -namespace clang { -namespace extractapi { - -/// The base interface of visitors for API information. -template class APISetVisitor { -public: - void traverseAPISet() { - getDerived()->traverseNamespaces(); - - getDerived()->traverseGlobalVariableRecords(); - - getDerived()->traverseGlobalFunctionRecords(); - - getDerived()->traverseEnumRecords(); - - getDerived()->traverseStaticFieldRecords(); - - getDerived()->traverseCXXClassRecords(); - - getDerived()->traverseClassTemplateRecords(); - - getDerived()->traverseClassTemplateSpecializationRecords(); - - getDerived()->traverseClassTemplatePartialSpecializationRecords(); - - getDerived()->traverseCXXInstanceMethods(); - - getDerived()->traverseCXXStaticMethods(); - - getDerived()->traverseCXXMethodTemplates(); - - getDerived()->traverseCXXMethodTemplateSpecializations(); - - getDerived()->traverseCXXFields(); - - getDerived()->traverseCXXFieldTemplates(); - - getDerived()->traverseConcepts(); - - getDerived()->traverseGlobalVariableTemplateRecords(); - - getDerived()->traverseGlobalVariableTemplateSpecializationRecords(); - - getDerived()->traverseGlobalVariableTemplatePartialSpecializationRecords(); - - getDerived()->traverseGlobalFunctionTemplateRecords(); - - getDerived()->traverseGlobalFunctionTemplateSpecializationRecords(); - - getDerived()->traverseRecordRecords(); - - getDerived()->traverseObjCInterfaces(); - - getDerived()->traverseObjCProtocols(); - - getDerived()->traverseObjCCategories(); - - getDerived()->traverseMacroDefinitionRecords(); - - getDerived()->traverseTypedefRecords(); - } - - void traverseNamespaces() { - for (const auto &Namespace : API.getNamespaces()) - getDerived()->visitNamespaceRecord(*Namespace.second); - } - - void traverseGlobalFunctionRecords() { - for (const auto &GlobalFunction : API.getGlobalFunctions()) - getDerived()->visitGlobalFunctionRecord(*GlobalFunction.second); - } - - void traverseGlobalVariableRecords() { - for (const auto &GlobalVariable : API.getGlobalVariables()) - getDerived()->visitGlobalVariableRecord(*GlobalVariable.second); - } - - void traverseEnumRecords() { - for (const auto &Enum : API.getEnums()) - getDerived()->visitEnumRecord(*Enum.second); - } - - void traverseRecordRecords() { - for (const auto &Record : API.getRecords()) - getDerived()->visitRecordRecord(*Record.second); - } - - void traverseStaticFieldRecords() { - for (const auto &StaticField : API.getStaticFields()) - getDerived()->visitStaticFieldRecord(*StaticField.second); - } - - void traverseCXXClassRecords() { - for (const auto &Class : API.getCXXClasses()) - getDerived()->visitCXXClassRecord(*Class.second); - } - - void traverseCXXMethodTemplates() { - for (const auto &MethodTemplate : API.getCXXMethodTemplates()) - getDerived()->visitMethodTemplateRecord(*MethodTemplate.second); - } - - void traverseCXXMethodTemplateSpecializations() { - for (const auto &MethodTemplateSpecialization : - API.getCXXMethodTemplateSpecializations()) - getDerived()->visitMethodTemplateSpecializationRecord( - *MethodTemplateSpecialization.second); - } - - void traverseClassTemplateRecords() { - for (const auto &ClassTemplate : API.getClassTemplates()) - getDerived()->visitClassTemplateRecord(*ClassTemplate.second); - } - - void traverseClassTemplateSpecializationRecords() { - for (const auto &ClassTemplateSpecialization : - API.getClassTemplateSpecializations()) - getDerived()->visitClassTemplateSpecializationRecord( - *ClassTemplateSpecialization.second); - } - - void traverseClassTemplatePartialSpecializationRecords() { - for (const auto &ClassTemplatePartialSpecialization : - API.getClassTemplatePartialSpecializations()) - getDerived()->visitClassTemplatePartialSpecializationRecord( - *ClassTemplatePartialSpecialization.second); - } - - void traverseCXXInstanceMethods() { - for (const auto &InstanceMethod : API.getCXXInstanceMethods()) - getDerived()->visitCXXInstanceMethodRecord(*InstanceMethod.second); - } - - void traverseCXXStaticMethods() { - for (const auto &InstanceMethod : API.getCXXStaticMethods()) - getDerived()->visitCXXStaticMethodRecord(*InstanceMethod.second); - } - - void traverseCXXFields() { - for (const auto &CXXField : API.getCXXFields()) - getDerived()->visitCXXFieldRecord(*CXXField.second); - } - - void traverseCXXFieldTemplates() { - for (const auto &CXXFieldTemplate : API.getCXXFieldTemplates()) - getDerived()->visitCXXFieldTemplateRecord(*CXXFieldTemplate.second); - } - - void traverseGlobalVariableTemplateRecords() { - for (const auto &GlobalVariableTemplate : API.getGlobalVariableTemplates()) - getDerived()->visitGlobalVariableTemplateRecord( - *GlobalVariableTemplate.second); - } - - void traverseGlobalVariableTemplateSpecializationRecords() { - for (const auto &GlobalVariableTemplateSpecialization : - API.getGlobalVariableTemplateSpecializations()) - getDerived()->visitGlobalVariableTemplateSpecializationRecord( - *GlobalVariableTemplateSpecialization.second); - } - - void traverseGlobalVariableTemplatePartialSpecializationRecords() { - for (const auto &GlobalVariableTemplatePartialSpecialization : - API.getGlobalVariableTemplatePartialSpecializations()) - getDerived()->visitGlobalVariableTemplatePartialSpecializationRecord( - *GlobalVariableTemplatePartialSpecialization.second); - } - - void traverseGlobalFunctionTemplateRecords() { - for (const auto &GlobalFunctionTemplate : API.getGlobalFunctionTemplates()) - getDerived()->visitGlobalFunctionTemplateRecord( - *GlobalFunctionTemplate.second); - } - - void traverseGlobalFunctionTemplateSpecializationRecords() { - for (const auto &GlobalFunctionTemplateSpecialization : - API.getGlobalFunctionTemplateSpecializations()) - getDerived()->visitGlobalFunctionTemplateSpecializationRecord( - *GlobalFunctionTemplateSpecialization.second); - } - - void traverseConcepts() { - for (const auto &Concept : API.getConcepts()) - getDerived()->visitConceptRecord(*Concept.second); - } - - void traverseObjCInterfaces() { - for (const auto &Interface : API.getObjCInterfaces()) - getDerived()->visitObjCContainerRecord(*Interface.second); - } - - void traverseObjCProtocols() { - for (const auto &Protocol : API.getObjCProtocols()) - getDerived()->visitObjCContainerRecord(*Protocol.second); - } - - void traverseObjCCategories() { - for (const auto &Category : API.getObjCCategories()) - getDerived()->visitObjCCategoryRecord(*Category.second); - } - - void traverseMacroDefinitionRecords() { - for (const auto &Macro : API.getMacros()) - getDerived()->visitMacroDefinitionRecord(*Macro.second); - } - - void traverseTypedefRecords() { - for (const auto &Typedef : API.getTypedefs()) - getDerived()->visitTypedefRecord(*Typedef.second); - } - - void visitNamespaceRecord(const NamespaceRecord &Record){}; - - /// Visit a global function record. - void visitGlobalFunctionRecord(const GlobalFunctionRecord &Record){}; - - /// Visit a global variable record. - void visitGlobalVariableRecord(const GlobalVariableRecord &Record){}; - - /// Visit an enum record. - void visitEnumRecord(const EnumRecord &Record){}; - - /// Visit a record record. - void visitRecordRecord(const RecordRecord &Record){}; - - void visitStaticFieldRecord(const StaticFieldRecord &Record){}; - - void visitCXXClassRecord(const CXXClassRecord &Record){}; - - void visitClassTemplateRecord(const ClassTemplateRecord &Record){}; - - void visitClassTemplateSpecializationRecord( - const ClassTemplateSpecializationRecord &Record){}; - - void visitClassTemplatePartialSpecializationRecord( - const ClassTemplatePartialSpecializationRecord &Record){}; - - void visitCXXInstanceRecord(const CXXInstanceMethodRecord &Record){}; - - void visitCXXStaticRecord(const CXXStaticMethodRecord &Record){}; - - void visitMethodTemplateRecord(const CXXMethodTemplateRecord &Record){}; - - void visitMethodTemplateSpecializationRecord( - const CXXMethodTemplateSpecializationRecord &Record){}; - - void visitCXXFieldTemplateRecord(const CXXFieldTemplateRecord &Record){}; - - void visitGlobalVariableTemplateRecord( - const GlobalVariableTemplateRecord &Record) {} - - void visitGlobalVariableTemplateSpecializationRecord( - const GlobalVariableTemplateSpecializationRecord &Record){}; - - void visitGlobalVariableTemplatePartialSpecializationRecord( - const GlobalVariableTemplatePartialSpecializationRecord &Record){}; - - void visitGlobalFunctionTemplateRecord( - const GlobalFunctionTemplateRecord &Record){}; - - void visitGlobalFunctionTemplateSpecializationRecord( - const GlobalFunctionTemplateSpecializationRecord &Record){}; - - /// Visit an Objective-C container record. - void visitObjCContainerRecord(const ObjCContainerRecord &Record){}; - - /// Visit an Objective-C category record. - void visitObjCCategoryRecord(const ObjCCategoryRecord &Record){}; - - /// Visit a macro definition record. - void visitMacroDefinitionRecord(const MacroDefinitionRecord &Record){}; - - /// Visit a typedef record. - void visitTypedefRecord(const TypedefRecord &Record){}; - -protected: - const APISet &API; - -public: - APISetVisitor() = delete; - APISetVisitor(const APISetVisitor &) = delete; - APISetVisitor(APISetVisitor &&) = delete; - APISetVisitor &operator=(const APISetVisitor &) = delete; - APISetVisitor &operator=(APISetVisitor &&) = delete; - -protected: - APISetVisitor(const APISet &API) : API(API) {} - ~APISetVisitor() = default; - - Derived *getDerived() { return static_cast(this); }; -}; - -} // namespace extractapi -} // namespace clang - -#endif // LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SERIALIZERBASE_H diff --git a/clang/include/clang/ExtractAPI/Serialization/SymbolGraphSerializer.h b/clang/include/clang/ExtractAPI/Serialization/SymbolGraphSerializer.h index 4249ac4..724b087 100644 --- a/clang/include/clang/ExtractAPI/Serialization/SymbolGraphSerializer.h +++ b/clang/include/clang/ExtractAPI/Serialization/SymbolGraphSerializer.h @@ -17,11 +17,17 @@ #ifndef LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SYMBOLGRAPHSERIALIZER_H #define LLVM_CLANG_EXTRACTAPI_SERIALIZATION_SYMBOLGRAPHSERIALIZER_H +#include "clang/Basic/Module.h" #include "clang/ExtractAPI/API.h" #include "clang/ExtractAPI/APIIgnoresList.h" -#include "clang/ExtractAPI/Serialization/SerializerBase.h" +#include "clang/ExtractAPI/Serialization/APISetVisitor.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/JSON.h" #include "llvm/Support/VersionTuple.h" #include "llvm/Support/raw_ostream.h" @@ -35,7 +41,30 @@ using namespace llvm::json; /// Common options to customize the visitor output. struct SymbolGraphSerializerOption { /// Do not include unnecessary whitespaces to save space. - bool Compact; + bool Compact = true; + bool EmitSymbolLabelsForTesting = false; +}; + +/// A representation of the contents of a given module symbol graph +struct ExtendedModule { + ExtendedModule() = default; + ExtendedModule(ExtendedModule &&EM) = default; + ExtendedModule &operator=(ExtendedModule &&EM) = default; + // Copies are expensive so disable them. + ExtendedModule(const ExtendedModule &EM) = delete; + ExtendedModule &operator=(const ExtendedModule &EM) = delete; + + /// Add a symbol to the module, do not store the resulting pointer or use it + /// across insertions. + Object *addSymbol(Object &&Symbol); + + void addRelationship(Object &&Relationship); + + /// A JSON array of formatted symbols from an \c APISet. + Array Symbols; + + /// A JSON array of formatted symbol relationships from an \c APISet. + Array Relationships; }; /// The visitor that organizes API information in the Symbol Graph format. @@ -44,28 +73,54 @@ struct SymbolGraphSerializerOption { /// models an API set as a directed graph, where nodes are symbol declarations, /// and edges are relationships between the connected symbols. class SymbolGraphSerializer : public APISetVisitor { - /// A JSON array of formatted symbols in \c APISet. - Array Symbols; +private: + using Base = APISetVisitor; + /// The main symbol graph that contains symbols that are either top-level or a + /// are related to symbols defined in this product/module. + ExtendedModule MainModule; - /// A JSON array of formatted symbol relationships in \c APISet. - Array Relationships; + /// Additional symbol graphs that contain symbols that are related to symbols + /// defined in another product/module. The key of this map is the module name + /// of the extended module. + llvm::StringMap ExtendedModules; /// The Symbol Graph format version used by this serializer. static const VersionTuple FormatVersion; - /// Indicates whether child symbols should be visited. This is mainly + /// Indicates whether to take into account the extended module. This is only /// useful for \c serializeSingleSymbolSGF. - bool ShouldRecurse; + bool ForceEmitToMainModule; -public: - /// Serialize the APIs in \c APISet in the Symbol Graph format. + // Stores the references required to construct path components for the + // currently visited APIRecord. + llvm::SmallVector Hierarchy; + + /// The list of symbols to ignore. /// - /// \returns a JSON object that contains the root of the formatted - /// Symbol Graph. - Object serialize(); + /// Note: This should be consulted before emitting a symbol. + const APIIgnoresList &IgnoresList; - /// Wrap serialize(void) and write out the serialized JSON object to \p os. - void serialize(raw_ostream &os); + const bool EmitSymbolLabelsForTesting = false; + + /// The object instantiated by the last call to serializeAPIRecord. + Object *CurrentSymbol = nullptr; + + /// The module to which \p CurrentSymbol belongs too. + ExtendedModule *ModuleForCurrentSymbol = nullptr; + +public: + static void + serializeMainSymbolGraph(raw_ostream &OS, const APISet &API, + const APIIgnoresList &IgnoresList, + SymbolGraphSerializerOption Options = {}); + + static void serializeWithExtensionGraphs( + raw_ostream &MainOutput, const APISet &API, + const APIIgnoresList &IgnoresList, + llvm::function_ref< + std::unique_ptr(llvm::Twine BaseFileName)> + CreateOutputStream, + SymbolGraphSerializerOption Options = {}); /// Serialize a single symbol SGF. This is primarily used for libclang. /// @@ -75,6 +130,7 @@ public: static std::optional serializeSingleSymbolSGF(StringRef USR, const APISet &API); +private: /// The kind of a relationship between two symbols. enum RelationshipKind { /// The source symbol is a member of the target symbol. @@ -94,16 +150,32 @@ public: ExtensionTo, }; + /// Serialize a single record. + void serializeSingleRecord(const APIRecord *Record); + /// Get the string representation of the relationship kind. static StringRef getRelationshipString(RelationshipKind Kind); + void serializeRelationship(RelationshipKind Kind, + const SymbolReference &Source, + const SymbolReference &Target, + ExtendedModule &Into); + enum ConstraintKind { Conformance, ConditionalConformance }; static StringRef getConstraintString(ConstraintKind Kind); -private: - /// Just serialize the currently recorded objects in Symbol Graph format. - Object serializeCurrentGraph(); + /// Serialize the APIs in \c ExtendedModule. + /// + /// \returns a JSON object that contains the root of the formatted + /// Symbol Graph. + Object serializeGraph(StringRef ModuleName, ExtendedModule &&EM); + + /// Serialize the APIs in \c ExtendedModule in the Symbol Graph format and + /// write them to the provide stream. + void serializeGraphToStream(raw_ostream &OS, + SymbolGraphSerializerOption Options, + StringRef ModuleName, ExtendedModule &&EM); /// Synthesize the metadata section of the Symbol Graph format. /// @@ -117,124 +189,92 @@ private: /// by the given API set. /// Note that "module" here is not to be confused with the Clang/C++ module /// concept. - Object serializeModule() const; + Object serializeModuleObject(StringRef ModuleName) const; + + Array serializePathComponents(const APIRecord *Record) const; /// Determine if the given \p Record should be skipped during serialization. - bool shouldSkip(const APIRecord &Record) const; + bool shouldSkip(const APIRecord *Record) const; + + ExtendedModule &getModuleForCurrentSymbol(); /// Format the common API information for \p Record. /// /// This handles the shared information of all kinds of API records, - /// for example identifier and source location. The resulting object is then - /// augmented with kind-specific symbol information by the caller. - /// This method also checks if the given \p Record should be skipped during - /// serialization. + /// for example identifier, source location and path components. The resulting + /// object is then augmented with kind-specific symbol information in + /// subsequent visit* methods by accessing the \p State member variable. This + /// method also checks if the given \p Record should be skipped during + /// serialization. This should be called only once per concrete APIRecord + /// instance and the first visit* method to be called is responsible for + /// calling this. This is normally visitAPIRecord unless a walkUpFromFoo + /// method is implemented along the inheritance hierarchy in which case the + /// visitFoo method needs to call this. /// - /// \returns \c std::nullopt if this \p Record should be skipped, or a JSON - /// object containing common symbol information of \p Record. - template - std::optional serializeAPIRecord(const RecordTy &Record) const; - - /// Helper method to serialize second-level member records of \p Record and - /// the member-of relationships. - template - void serializeMembers(const APIRecord &Record, - const SmallVector> &Members); - - /// Serialize the \p Kind relationship between \p Source and \p Target. - /// - /// Record the relationship between the two symbols in - /// SymbolGraphSerializer::Relationships. - void serializeRelationship(RelationshipKind Kind, SymbolReference Source, - SymbolReference Target); - -protected: - /// The list of symbols to ignore. - /// - /// Note: This should be consulted before emitting a symbol. - const APIIgnoresList &IgnoresList; - - SymbolGraphSerializerOption Options; - - llvm::StringSet<> visitedCategories; + /// \returns \c nullptr if this \p Record should be skipped, or a pointer to + /// JSON object containing common symbol information of \p Record. Do not + /// store the returned pointer only use it to augment the object with record + /// specific information as it directly points to the object in the + /// \p ExtendedModule, the pointer won't be valid as soon as another object is + /// inserted into the module. + void serializeAPIRecord(const APIRecord *Record); public: - void visitNamespaceRecord(const NamespaceRecord &Record); - - /// Visit a global function record. - void visitGlobalFunctionRecord(const GlobalFunctionRecord &Record); - - /// Visit a global variable record. - void visitGlobalVariableRecord(const GlobalVariableRecord &Record); - - /// Visit an enum record. - void visitEnumRecord(const EnumRecord &Record); - - /// Visit a record record. - void visitRecordRecord(const RecordRecord &Record); - - void visitStaticFieldRecord(const StaticFieldRecord &Record); + // Handle if records should be skipped at this level of the traversal to + // ensure that children of skipped records aren't serialized. + bool traverseAPIRecord(const APIRecord *Record); - void visitCXXClassRecord(const CXXClassRecord &Record); + bool visitAPIRecord(const APIRecord *Record); - void visitClassTemplateRecord(const ClassTemplateRecord &Record); - - void visitClassTemplateSpecializationRecord( - const ClassTemplateSpecializationRecord &Record); - - void visitClassTemplatePartialSpecializationRecord( - const ClassTemplatePartialSpecializationRecord &Record); - - void visitCXXInstanceMethodRecord(const CXXInstanceMethodRecord &Record); + /// Visit a global function record. + bool visitGlobalFunctionRecord(const GlobalFunctionRecord *Record); - void visitCXXStaticMethodRecord(const CXXStaticMethodRecord &Record); + bool visitCXXClassRecord(const CXXClassRecord *Record); - void visitMethodTemplateRecord(const CXXMethodTemplateRecord &Record); + bool visitClassTemplateRecord(const ClassTemplateRecord *Record); - void visitMethodTemplateSpecializationRecord( - const CXXMethodTemplateSpecializationRecord &Record); + bool visitClassTemplatePartialSpecializationRecord( + const ClassTemplatePartialSpecializationRecord *Record); - void visitCXXFieldRecord(const CXXFieldRecord &Record); + bool visitCXXMethodRecord(const CXXMethodRecord *Record); - void visitCXXFieldTemplateRecord(const CXXFieldTemplateRecord &Record); + bool visitCXXMethodTemplateRecord(const CXXMethodTemplateRecord *Record); - void visitConceptRecord(const ConceptRecord &Record); + bool visitCXXFieldTemplateRecord(const CXXFieldTemplateRecord *Record); - void - visitGlobalVariableTemplateRecord(const GlobalVariableTemplateRecord &Record); + bool visitConceptRecord(const ConceptRecord *Record); - void visitGlobalVariableTemplateSpecializationRecord( - const GlobalVariableTemplateSpecializationRecord &Record); + bool + visitGlobalVariableTemplateRecord(const GlobalVariableTemplateRecord *Record); - void visitGlobalVariableTemplatePartialSpecializationRecord( - const GlobalVariableTemplatePartialSpecializationRecord &Record); + bool visitGlobalVariableTemplatePartialSpecializationRecord( + const GlobalVariableTemplatePartialSpecializationRecord *Record); - void - visitGlobalFunctionTemplateRecord(const GlobalFunctionTemplateRecord &Record); + bool + visitGlobalFunctionTemplateRecord(const GlobalFunctionTemplateRecord *Record); - void visitGlobalFunctionTemplateSpecializationRecord( - const GlobalFunctionTemplateSpecializationRecord &Record); + bool visitObjCContainerRecord(const ObjCContainerRecord *Record); - /// Visit an Objective-C container record. - void visitObjCContainerRecord(const ObjCContainerRecord &Record); + bool visitObjCInterfaceRecord(const ObjCInterfaceRecord *Record); - /// Visit an Objective-C category record. - void visitObjCCategoryRecord(const ObjCCategoryRecord &Record); + bool traverseObjCCategoryRecord(const ObjCCategoryRecord *Record); + bool walkUpFromObjCCategoryRecord(const ObjCCategoryRecord *Record); + bool visitObjCCategoryRecord(const ObjCCategoryRecord *Record); - /// Visit a macro definition record. - void visitMacroDefinitionRecord(const MacroDefinitionRecord &Record); + bool visitObjCMethodRecord(const ObjCMethodRecord *Record); - /// Visit a typedef record. - void visitTypedefRecord(const TypedefRecord &Record); + bool + visitObjCInstanceVariableRecord(const ObjCInstanceVariableRecord *Record); - /// Serialize a single record. - void serializeSingleRecord(const APIRecord *Record); + bool walkUpFromTypedefRecord(const TypedefRecord *Record); + bool visitTypedefRecord(const TypedefRecord *Record); SymbolGraphSerializer(const APISet &API, const APIIgnoresList &IgnoresList, - SymbolGraphSerializerOption Options = {}, - bool ShouldRecurse = true) - : APISetVisitor(API), ShouldRecurse(ShouldRecurse), - IgnoresList(IgnoresList), Options(Options) {} + bool EmitSymbolLabelsForTesting = false, + bool ForceEmitToMainModule = false) + : Base(API), ForceEmitToMainModule(ForceEmitToMainModule), + IgnoresList(IgnoresList), + EmitSymbolLabelsForTesting(EmitSymbolLabelsForTesting) {} }; } // namespace extractapi diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 8085dbc..864af66 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -15,6 +15,7 @@ #include "clang/Sema/CodeCompleteOptions.h" #include "clang/Serialization/ModuleFileExtension.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Compiler.h" #include "llvm/Support/MemoryBuffer.h" #include #include @@ -387,6 +388,22 @@ public: LLVM_PREFERRED_TYPE(bool) unsigned ModulesShareFileManager : 1; + /// Whether to emit symbol graph files as a side effect of compilation. + LLVM_PREFERRED_TYPE(bool) + unsigned EmitSymbolGraph : 1; + + /// Whether to emit additional symbol graphs for extended modules. + LLVM_PREFERRED_TYPE(bool) + unsigned EmitExtensionSymbolGraphs : 1; + + /// Whether to emit symbol labels for testing in generated symbol graphs + LLVM_PREFERRED_TYPE(bool) + unsigned EmitSymbolGraphSymbolLabelsForTesting : 1; + + /// Whether to emit symbol labels for testing in generated symbol graphs + LLVM_PREFERRED_TYPE(bool) + unsigned EmitPrettySymbolGraphs : 1; + CodeCompleteOptions CodeCompleteOpts; /// Specifies the output format of the AST. @@ -496,10 +513,8 @@ public: // ignore when extracting documentation. std::vector ExtractAPIIgnoresFileList; - // Currently this is only used as part of the `-emit-symbol-graph` - // action. // Location of output directory where symbol graph information would - // be dumped + // be dumped. This overrides regular -o output file specification std::string SymbolGraphOutputDir; /// Args to pass to the plugins diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 1a0f5f2..e6c1767 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -49,6 +49,7 @@ #include "ToolChains/WebAssembly.h" #include "ToolChains/XCore.h" #include "ToolChains/ZOS.h" +#include "clang/Basic/DiagnosticDriver.h" #include "clang/Basic/TargetID.h" #include "clang/Basic/Version.h" #include "clang/Config/config.h" @@ -5889,6 +5890,12 @@ const char *Driver::GetNamedOutputPath(Compilation &C, const JobAction &JA, &JA); } + if (JA.getType() == types::TY_API_INFO && + C.getArgs().hasArg(options::OPT_emit_extension_symbol_graphs) && + C.getArgs().hasArg(options::OPT_o)) + Diag(clang::diag::err_drv_unexpected_symbol_graph_output) + << C.getArgs().getLastArgValue(options::OPT_o); + // DXC defaults to standard out when generating assembly. We check this after // any DXC flags that might specify a file. if (AtTopLevel && JA.getType() == types::TY_PP_Asm && IsDXCMode()) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 7fd6ad6..b7ec7e0 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5046,11 +5046,26 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, assert(JA.getType() == types::TY_API_INFO && "Extract API actions must generate a API information."); CmdArgs.push_back("-extract-api"); + + if (Arg *PrettySGFArg = Args.getLastArg(options::OPT_emit_pretty_sgf)) + PrettySGFArg->render(Args, CmdArgs); + + Arg *SymbolGraphDirArg = Args.getLastArg(options::OPT_symbol_graph_dir_EQ); + if (Arg *ProductNameArg = Args.getLastArg(options::OPT_product_name_EQ)) ProductNameArg->render(Args, CmdArgs); if (Arg *ExtractAPIIgnoresFileArg = Args.getLastArg(options::OPT_extract_api_ignores_EQ)) ExtractAPIIgnoresFileArg->render(Args, CmdArgs); + if (Arg *EmitExtensionSymbolGraphs = + Args.getLastArg(options::OPT_emit_extension_symbol_graphs)) { + if (!SymbolGraphDirArg) + D.Diag(diag::err_drv_missing_symbol_graph_dir); + + EmitExtensionSymbolGraphs->render(Args, CmdArgs); + } + if (SymbolGraphDirArg) + SymbolGraphDirArg->render(Args, CmdArgs); } else { assert((isa(JA) || isa(JA)) && "Invalid action for clang tool."); diff --git a/clang/lib/ExtractAPI/API.cpp b/clang/lib/ExtractAPI/API.cpp index aa7a1e9..5a62c5d 100644 --- a/clang/lib/ExtractAPI/API.cpp +++ b/clang/lib/ExtractAPI/API.cpp @@ -13,514 +13,67 @@ //===----------------------------------------------------------------------===// #include "clang/ExtractAPI/API.h" -#include "clang/AST/CommentCommandTraits.h" -#include "clang/AST/CommentLexer.h" #include "clang/AST/RawCommentList.h" +#include "clang/Basic/Module.h" #include "clang/Index/USRGeneration.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/ErrorHandling.h" #include using namespace clang::extractapi; using namespace llvm; -namespace { +SymbolReference::SymbolReference(const APIRecord *R) + : Name(R->Name), USR(R->USR), Record(R) {} -template -RecordTy *addTopLevelRecord(DenseMap &USRLookupTable, - APISet::RecordMap &RecordMap, - StringRef USR, CtorArgsTy &&...CtorArgs) { - auto Result = RecordMap.insert({USR, nullptr}); - - // Create the record if it does not already exist - if (Result.second) - Result.first->second = - std::make_unique(USR, std::forward(CtorArgs)...); - - auto *Record = Result.first->second.get(); - USRLookupTable.insert({USR, Record}); - return Record; -} - -} // namespace - -NamespaceRecord * -APISet::addNamespace(APIRecord *Parent, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - LinkageInfo Linkage, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader) { - auto *Record = addTopLevelRecord( - USRBasedLookupTable, Namespaces, USR, Name, Loc, std::move(Availability), - Linkage, Comment, Declaration, SubHeading, IsFromSystemHeader); - - if (Parent) - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - return Record; -} - -GlobalVariableRecord * -APISet::addGlobalVar(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Fragments, - DeclarationFragments SubHeading, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, GlobalVariables, USR, Name, Loc, - std::move(Availability), Linkage, Comment, Fragments, - SubHeading, IsFromSystemHeader); -} - -GlobalVariableTemplateRecord *APISet::addGlobalVariableTemplate( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, GlobalVariableTemplates, USR, - Name, Loc, std::move(Availability), Linkage, Comment, - Declaration, SubHeading, Template, - IsFromSystemHeader); -} - -GlobalFunctionRecord *APISet::addGlobalFunction( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Fragments, - DeclarationFragments SubHeading, FunctionSignature Signature, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, GlobalFunctions, USR, Name, Loc, - std::move(Availability), Linkage, Comment, Fragments, - SubHeading, Signature, IsFromSystemHeader); -} - -GlobalFunctionTemplateRecord *APISet::addGlobalFunctionTemplate( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, FunctionSignature Signature, - Template Template, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, GlobalFunctionTemplates, USR, - Name, Loc, std::move(Availability), Linkage, Comment, - Declaration, SubHeading, Signature, Template, - IsFromSystemHeader); -} - -GlobalFunctionTemplateSpecializationRecord * -APISet::addGlobalFunctionTemplateSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, FunctionSignature Signature, - bool IsFromSystemHeader) { - return addTopLevelRecord( - USRBasedLookupTable, GlobalFunctionTemplateSpecializations, USR, Name, - Loc, std::move(Availability), Linkage, Comment, Declaration, SubHeading, - Signature, IsFromSystemHeader); -} - -EnumConstantRecord *APISet::addEnumConstant(EnumRecord *Enum, StringRef Name, - StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeader) { - auto Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, SubHeading, - IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Enum->USR, Enum->Name, Enum->getKind(), Enum); - USRBasedLookupTable.insert({USR, Record.get()}); - return Enum->Constants.emplace_back(std::move(Record)).get(); -} - -EnumRecord *APISet::addEnum(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, Enums, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, IsFromSystemHeader); -} - -RecordFieldRecord *APISet::addRecordField( - RecordRecord *Record, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - APIRecord::RecordKind Kind, bool IsFromSystemHeader) { - auto RecordField = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, SubHeading, - Kind, IsFromSystemHeader); - RecordField->ParentInformation = APIRecord::HierarchyInformation( - Record->USR, Record->Name, Record->getKind(), Record); - USRBasedLookupTable.insert({USR, RecordField.get()}); - return Record->Fields.emplace_back(std::move(RecordField)).get(); -} - -RecordRecord *APISet::addRecord(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - APIRecord::RecordKind Kind, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, Records, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Kind, IsFromSystemHeader); -} - -StaticFieldRecord * -APISet::addStaticField(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, SymbolReference Context, - AccessControl Access, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, StaticFields, USR, Name, Loc, - std::move(Availability), Linkage, Comment, - Declaration, SubHeading, Context, Access, - IsFromSystemHeader); -} - -CXXFieldRecord * -APISet::addCXXField(APIRecord *CXXClass, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, AccessControl Access, - bool IsFromSystemHeader) { - auto *Record = addTopLevelRecord( - USRBasedLookupTable, CXXFields, USR, Name, Loc, std::move(Availability), - Comment, Declaration, SubHeading, Access, IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - CXXClass->USR, CXXClass->Name, CXXClass->getKind(), CXXClass); - return Record; -} - -CXXFieldTemplateRecord *APISet::addCXXFieldTemplate( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - AccessControl Access, Template Template, bool IsFromSystemHeader) { - auto *Record = - addTopLevelRecord(USRBasedLookupTable, CXXFieldTemplates, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Access, Template, IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - - return Record; -} - -CXXClassRecord * -APISet::addCXXClass(APIRecord *Parent, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, APIRecord::RecordKind Kind, - AccessControl Access, bool IsFromSystemHeader) { - auto *Record = addTopLevelRecord( - USRBasedLookupTable, CXXClasses, USR, Name, Loc, std::move(Availability), - Comment, Declaration, SubHeading, Kind, Access, IsFromSystemHeader); - if (Parent) - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - return Record; -} - -ClassTemplateRecord *APISet::addClassTemplate( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - Template Template, AccessControl Access, bool IsFromSystemHeader) { - auto *Record = - addTopLevelRecord(USRBasedLookupTable, ClassTemplates, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Template, Access, IsFromSystemHeader); - if (Parent) - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - return Record; -} - -ClassTemplateSpecializationRecord *APISet::addClassTemplateSpecialization( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - AccessControl Access, bool IsFromSystemHeader) { - auto *Record = - addTopLevelRecord(USRBasedLookupTable, ClassTemplateSpecializations, USR, - Name, Loc, std::move(Availability), Comment, - Declaration, SubHeading, Access, IsFromSystemHeader); - if (Parent) - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - return Record; -} - -ClassTemplatePartialSpecializationRecord * -APISet::addClassTemplatePartialSpecialization( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - Template Template, AccessControl Access, bool IsFromSystemHeader) { - auto *Record = addTopLevelRecord( - USRBasedLookupTable, ClassTemplatePartialSpecializations, USR, Name, Loc, - std::move(Availability), Comment, Declaration, SubHeading, Template, - Access, IsFromSystemHeader); - if (Parent) - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - return Record; -} - -GlobalVariableTemplateSpecializationRecord * -APISet::addGlobalVariableTemplateSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, - GlobalVariableTemplateSpecializations, USR, Name, - Loc, std::move(Availability), Linkage, Comment, - Declaration, SubHeading, IsFromSystemHeader); -} - -GlobalVariableTemplatePartialSpecializationRecord * -APISet::addGlobalVariableTemplatePartialSpecialization( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, DeclarationFragments Declaration, - DeclarationFragments SubHeading, Template Template, - bool IsFromSystemHeader) { - return addTopLevelRecord( - USRBasedLookupTable, GlobalVariableTemplatePartialSpecializations, USR, - Name, Loc, std::move(Availability), Linkage, Comment, Declaration, - SubHeading, Template, IsFromSystemHeader); -} - -ConceptRecord *APISet::addConcept(StringRef Name, StringRef USR, - PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - Template Template, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, Concepts, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Template, IsFromSystemHeader); -} - -CXXMethodRecord *APISet::addCXXInstanceMethod( - APIRecord *CXXClassRecord, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader) { - CXXMethodRecord *Record = - addTopLevelRecord(USRBasedLookupTable, CXXInstanceMethods, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader); - - Record->ParentInformation = APIRecord::HierarchyInformation( - CXXClassRecord->USR, CXXClassRecord->Name, CXXClassRecord->getKind(), - CXXClassRecord); - return Record; -} - -CXXMethodRecord *APISet::addCXXStaticMethod( - APIRecord *CXXClassRecord, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader) { - CXXMethodRecord *Record = - addTopLevelRecord(USRBasedLookupTable, CXXStaticMethods, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Signature, Access, IsFromSystemHeader); - - Record->ParentInformation = APIRecord::HierarchyInformation( - CXXClassRecord->USR, CXXClassRecord->Name, CXXClassRecord->getKind(), - CXXClassRecord); - return Record; -} - -CXXMethodTemplateRecord *APISet::addCXXMethodTemplate( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, Template Template, - bool IsFromSystemHeader) { - auto *Record = addTopLevelRecord(USRBasedLookupTable, CXXMethodTemplates, USR, - Name, Loc, std::move(Availability), Comment, - Declaration, SubHeading, Signature, Access, - Template, IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - - return Record; -} - -CXXMethodTemplateSpecializationRecord *APISet::addCXXMethodTemplateSpec( - APIRecord *Parent, StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, AccessControl Access, - bool IsFromSystemHeader) { - - auto *Record = addTopLevelRecord( - USRBasedLookupTable, CXXMethodTemplateSpecializations, USR, Name, Loc, - std::move(Availability), Comment, Declaration, SubHeading, Signature, - Access, IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Parent->USR, Parent->Name, Parent->getKind(), Parent); - - return Record; -} - -ObjCCategoryRecord *APISet::addObjCCategory( - StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - SymbolReference Interface, bool IsFromSystemHeader, - bool IsFromExternalModule) { - // Create the category record. - auto *Record = - addTopLevelRecord(USRBasedLookupTable, ObjCCategories, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, Interface, IsFromSystemHeader); - - Record->IsFromExternalModule = IsFromExternalModule; - - auto It = ObjCInterfaces.find(Interface.USR); - if (It != ObjCInterfaces.end()) - It->second->Categories.push_back(Record); - - return Record; -} - -ObjCInterfaceRecord * -APISet::addObjCInterface(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, LinkageInfo Linkage, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - SymbolReference SuperClass, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, ObjCInterfaces, USR, Name, Loc, - std::move(Availability), Linkage, Comment, - Declaration, SubHeading, SuperClass, - IsFromSystemHeader); -} - -ObjCMethodRecord *APISet::addObjCMethod( - ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - FunctionSignature Signature, bool IsInstanceMethod, - bool IsFromSystemHeader) { - std::unique_ptr Record; - if (IsInstanceMethod) - Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, IsFromSystemHeader); - else - Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Signature, IsFromSystemHeader); - - Record->ParentInformation = APIRecord::HierarchyInformation( - Container->USR, Container->Name, Container->getKind(), Container); - USRBasedLookupTable.insert({USR, Record.get()}); - return Container->Methods.emplace_back(std::move(Record)).get(); -} - -ObjCPropertyRecord *APISet::addObjCProperty( - ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - ObjCPropertyRecord::AttributeKind Attributes, StringRef GetterName, - StringRef SetterName, bool IsOptional, bool IsInstanceProperty, - bool IsFromSystemHeader) { - std::unique_ptr Record; - if (IsInstanceProperty) - Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Attributes, GetterName, SetterName, IsOptional, - IsFromSystemHeader); - else - Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, - SubHeading, Attributes, GetterName, SetterName, IsOptional, - IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Container->USR, Container->Name, Container->getKind(), Container); - USRBasedLookupTable.insert({USR, Record.get()}); - return Container->Properties.emplace_back(std::move(Record)).get(); -} - -ObjCInstanceVariableRecord *APISet::addObjCInstanceVariable( - ObjCContainerRecord *Container, StringRef Name, StringRef USR, - PresumedLoc Loc, AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, DeclarationFragments SubHeading, - ObjCInstanceVariableRecord::AccessControl Access, bool IsFromSystemHeader) { - auto Record = std::make_unique( - USR, Name, Loc, std::move(Availability), Comment, Declaration, SubHeading, - Access, IsFromSystemHeader); - Record->ParentInformation = APIRecord::HierarchyInformation( - Container->USR, Container->Name, Container->getKind(), Container); - USRBasedLookupTable.insert({USR, Record.get()}); - return Container->Ivars.emplace_back(std::move(Record)).get(); +APIRecord *APIRecord::castFromRecordContext(const RecordContext *Ctx) { + switch (Ctx->getKind()) { +#define RECORD_CONTEXT(CLASS, KIND) \ + case KIND: \ + return static_cast(const_cast(Ctx)); +#include "clang/ExtractAPI/APIRecords.inc" + default: + return nullptr; + // llvm_unreachable("RecordContext derived class isn't propertly + // implemented"); + } } -ObjCProtocolRecord *APISet::addObjCProtocol(StringRef Name, StringRef USR, - PresumedLoc Loc, - AvailabilityInfo Availability, - const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, ObjCProtocols, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, IsFromSystemHeader); +RecordContext *APIRecord::castToRecordContext(const APIRecord *Record) { + if (!Record) + return nullptr; + switch (Record->getKind()) { +#define RECORD_CONTEXT(CLASS, KIND) \ + case KIND: \ + return static_cast(const_cast(Record)); +#include "clang/ExtractAPI/APIRecords.inc" + default: + return nullptr; + // llvm_unreachable("RecordContext derived class isn't propertly + // implemented"); + } } -MacroDefinitionRecord * -APISet::addMacroDefinition(StringRef Name, StringRef USR, PresumedLoc Loc, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, Macros, USR, Name, Loc, - Declaration, SubHeading, IsFromSystemHeader); -} +void RecordContext::addToRecordChain(APIRecord *Record) const { + if (!First) { + First = Record; + Last = Record; + return; + } -TypedefRecord * -APISet::addTypedef(StringRef Name, StringRef USR, PresumedLoc Loc, - AvailabilityInfo Availability, const DocComment &Comment, - DeclarationFragments Declaration, - DeclarationFragments SubHeading, - SymbolReference UnderlyingType, bool IsFromSystemHeader) { - return addTopLevelRecord(USRBasedLookupTable, Typedefs, USR, Name, Loc, - std::move(Availability), Comment, Declaration, - SubHeading, UnderlyingType, IsFromSystemHeader); + Last->NextInContext = Record; + Last = Record; } APIRecord *APISet::findRecordForUSR(StringRef USR) const { if (USR.empty()) return nullptr; - return USRBasedLookupTable.lookup(USR); -} - -StringRef APISet::recordUSR(const Decl *D) { - SmallString<128> USR; - index::generateUSRForDecl(D, USR); - return copyString(USR); -} + auto FindIt = USRBasedLookupTable.find(USR); + if (FindIt != USRBasedLookupTable.end()) + return FindIt->getSecond().get(); -StringRef APISet::recordUSRForMacro(StringRef Name, SourceLocation SL, - const SourceManager &SM) { - SmallString<128> USR; - index::generateUSRForMacro(Name, SL, SM, USR); - return copyString(USR); + return nullptr; } StringRef APISet::copyString(StringRef String) { @@ -528,15 +81,22 @@ StringRef APISet::copyString(StringRef String) { return {}; // No need to allocate memory and copy if the string has already been stored. - if (StringAllocator.identifyObject(String.data())) + if (Allocator.identifyObject(String.data())) return String; - void *Ptr = StringAllocator.Allocate(String.size(), 1); + void *Ptr = Allocator.Allocate(String.size(), 1); memcpy(Ptr, String.data(), String.size()); return StringRef(reinterpret_cast(Ptr), String.size()); } +SymbolReference APISet::createSymbolReference(StringRef Name, StringRef USR, + StringRef Source) { + return SymbolReference(copyString(Name), copyString(USR), copyString(Source)); +} + APIRecord::~APIRecord() {} +RecordRecord::~RecordRecord() {} +RecordFieldRecord::~RecordFieldRecord() {} ObjCContainerRecord::~ObjCContainerRecord() {} ObjCMethodRecord::~ObjCMethodRecord() {} ObjCPropertyRecord::~ObjCPropertyRecord() {} @@ -546,8 +106,10 @@ void GlobalFunctionRecord::anchor() {} void GlobalVariableRecord::anchor() {} void EnumConstantRecord::anchor() {} void EnumRecord::anchor() {} -void RecordFieldRecord::anchor() {} -void RecordRecord::anchor() {} +void StructFieldRecord::anchor() {} +void StructRecord::anchor() {} +void UnionFieldRecord::anchor() {} +void UnionRecord::anchor() {} void CXXFieldRecord::anchor() {} void CXXClassRecord::anchor() {} void CXXConstructorRecord::anchor() {} diff --git a/clang/lib/ExtractAPI/DeclarationFragments.cpp b/clang/lib/ExtractAPI/DeclarationFragments.cpp index 22b98e0..0a24312 100644 --- a/clang/lib/ExtractAPI/DeclarationFragments.cpp +++ b/clang/lib/ExtractAPI/DeclarationFragments.cpp @@ -57,23 +57,44 @@ void findTypeLocForBlockDecl(const clang::TypeSourceInfo *TSInfo, } // namespace -DeclarationFragments &DeclarationFragments::appendSpace() { +DeclarationFragments & +DeclarationFragments::appendUnduplicatedTextCharacter(char Character) { if (!Fragments.empty()) { Fragment &Last = Fragments.back(); if (Last.Kind == FragmentKind::Text) { // Merge the extra space into the last fragment if the last fragment is // also text. - if (Last.Spelling.back() != ' ') { // avoid extra trailing spaces. - Last.Spelling.push_back(' '); + if (Last.Spelling.back() != Character) { // avoid duplicates at end + Last.Spelling.push_back(Character); } } else { - append(" ", FragmentKind::Text); + append("", FragmentKind::Text); + Fragments.back().Spelling.push_back(Character); } } return *this; } +DeclarationFragments &DeclarationFragments::appendSpace() { + return appendUnduplicatedTextCharacter(' '); +} + +DeclarationFragments &DeclarationFragments::appendSemicolon() { + return appendUnduplicatedTextCharacter(';'); +} + +DeclarationFragments &DeclarationFragments::removeTrailingSemicolon() { + if (Fragments.empty()) + return *this; + + Fragment &Last = Fragments.back(); + if (Last.Kind == FragmentKind::Text && Last.Spelling.back() == ';') + Last.Spelling.pop_back(); + + return *this; +} + StringRef DeclarationFragments::getFragmentKindString( DeclarationFragments::FragmentKind Kind) { switch (Kind) { @@ -466,7 +487,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForNamespace( if (!Decl->isAnonymousNamespace()) Fragments.appendSpace().append( Decl->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments @@ -508,7 +529,7 @@ DeclarationFragmentsBuilder::getFragmentsForVar(const VarDecl *Var) { return Fragments .append(Var->getName(), DeclarationFragments::FragmentKind::Identifier) .append(std::move(After)) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -538,7 +559,7 @@ DeclarationFragmentsBuilder::getFragmentsForVarTemplate(const VarDecl *Var) { Fragments.append(std::move(ArgumentFragment)) .appendSpace() .append(Var->getName(), DeclarationFragments::FragmentKind::Identifier) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); return Fragments; } @@ -698,7 +719,7 @@ DeclarationFragmentsBuilder::getFragmentsForFunction(const FunctionDecl *Func) { Fragments.append(DeclarationFragments::getExceptionSpecificationString( Func->getExceptionSpecType())); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForEnumConstant( @@ -727,7 +748,7 @@ DeclarationFragmentsBuilder::getFragmentsForEnum(const EnumDecl *EnumDecl) { getFragmentsForType(IntegerType, EnumDecl->getASTContext(), After)) .append(std::move(After)); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments @@ -743,7 +764,7 @@ DeclarationFragmentsBuilder::getFragmentsForField(const FieldDecl *Field) { .appendSpace() .append(Field->getName(), DeclarationFragments::FragmentKind::Identifier) .append(std::move(After)) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForRecordDecl( @@ -761,7 +782,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForRecordDecl( Fragments.appendSpace().append( Record->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForCXXClass( @@ -776,7 +797,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForCXXClass( Fragments.appendSpace().append( Record->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments @@ -806,7 +827,7 @@ DeclarationFragmentsBuilder::getFragmentsForSpecialCXXMethod( Fragments.append(DeclarationFragments::getExceptionSpecificationString( Method->getExceptionSpecType())); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForCXXMethod( @@ -846,7 +867,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForCXXMethod( Fragments.append(DeclarationFragments::getExceptionSpecificationString( Method->getExceptionSpecType())); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments @@ -877,7 +898,7 @@ DeclarationFragmentsBuilder::getFragmentsForConversionFunction( Fragments.appendSpace().append("const", DeclarationFragments::FragmentKind::Keyword); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments @@ -909,7 +930,7 @@ DeclarationFragmentsBuilder::getFragmentsForOverloadedOperator( Fragments.append(DeclarationFragments::getExceptionSpecificationString( Method->getExceptionSpecType())); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } // Get fragments for template parameters, e.g. T in tempalte ... @@ -997,7 +1018,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForConcept( .appendSpace() .append(Concept->getName().str(), DeclarationFragments::FragmentKind::Identifier) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -1038,7 +1059,7 @@ DeclarationFragmentsBuilder::getFragmentsForClassTemplateSpecialization( getFragmentsForTemplateArguments(Decl->getTemplateArgs().asArray(), Decl->getASTContext(), std::nullopt)) .append(">", DeclarationFragments::FragmentKind::Text) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -1060,7 +1081,7 @@ DeclarationFragmentsBuilder::getFragmentsForClassTemplatePartialSpecialization( Decl->getTemplateArgs().asArray(), Decl->getASTContext(), Decl->getTemplateArgsAsWritten()->arguments())) .append(">", DeclarationFragments::FragmentKind::Text) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -1079,7 +1100,7 @@ DeclarationFragmentsBuilder::getFragmentsForVarTemplateSpecialization( getFragmentsForTemplateArguments(Decl->getTemplateArgs().asArray(), Decl->getASTContext(), std::nullopt)) .append(">", DeclarationFragments::FragmentKind::Text) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -1101,7 +1122,7 @@ DeclarationFragmentsBuilder::getFragmentsForVarTemplatePartialSpecialization( Decl->getTemplateArgs().asArray(), Decl->getASTContext(), Decl->getTemplateArgsAsWritten()->arguments())) .append(">", DeclarationFragments::FragmentKind::Text) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments @@ -1172,7 +1193,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForObjCCategory( Fragments.append("@interface", DeclarationFragments::FragmentKind::Keyword) .appendSpace() - .append(Category->getClassInterface()->getName(), + .append(Interface->getName(), DeclarationFragments::FragmentKind::TypeIdentifier, InterfaceUSR, Interface) .append(" (", DeclarationFragments::FragmentKind::Text) @@ -1246,7 +1267,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForObjCMethod( Fragments.append(getFragmentsForParam(Param)); } - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForObjCProperty( @@ -1347,7 +1368,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForObjCProperty( .append(Property->getName(), DeclarationFragments::FragmentKind::Identifier) .append(std::move(After)) - .append(";", DeclarationFragments::FragmentKind::Text); + .appendSemicolon(); } DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForObjCProtocol( @@ -1391,7 +1412,7 @@ DeclarationFragments DeclarationFragmentsBuilder::getFragmentsForTypedef( .appendSpace() .append(Decl->getName(), DeclarationFragments::FragmentKind::Identifier); - return Fragments.append(";", DeclarationFragments::FragmentKind::Text); + return Fragments.appendSemicolon(); } // Instantiate template for FunctionDecl. diff --git a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp index 275f49b..d633585 100644 --- a/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp +++ b/clang/lib/ExtractAPI/ExtractAPIConsumer.cpp @@ -30,6 +30,7 @@ #include "clang/Frontend/CompilerInstance.h" #include "clang/Frontend/FrontendOptions.h" #include "clang/Frontend/MultiplexConsumer.h" +#include "clang/Index/USRGeneration.h" #include "clang/InstallAPI/HeaderFile.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/PPCallbacks.h" @@ -39,6 +40,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Error.h" #include "llvm/Support/FileSystem.h" @@ -327,11 +329,12 @@ public: StringRef Name = PM.MacroNameToken.getIdentifierInfo()->getName(); PresumedLoc Loc = SM.getPresumedLoc(PM.MacroNameToken.getLocation()); - StringRef USR = - API.recordUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM); + SmallString<128> USR; + index::generateUSRForMacro(Name, PM.MacroNameToken.getLocation(), SM, + USR); - API.addMacroDefinition( - Name, USR, Loc, + API.createRecord( + USR, Name, SymbolReference(), Loc, DeclarationFragmentsBuilder::getFragmentsForMacro(Name, PM.MD), DeclarationFragmentsBuilder::getSubHeadingForMacro(Name), SM.isInSystemHeader(PM.MacroNameToken.getLocation())); @@ -372,40 +375,57 @@ private: LocationFileChecker &LCF; }; +std::unique_ptr +createAdditionalSymbolGraphFile(CompilerInstance &CI, Twine BaseName) { + auto OutputDirectory = CI.getFrontendOpts().SymbolGraphOutputDir; + + SmallString<256> FileName; + llvm::sys::path::append(FileName, OutputDirectory, + BaseName + ".symbols.json"); + return CI.createOutputFile( + FileName, /*Binary*/ false, /*RemoveFileOnSignal*/ false, + /*UseTemporary*/ true, /*CreateMissingDirectories*/ true); +} + } // namespace -void ExtractAPIActionBase::ImplEndSourceFileAction() { - if (!OS) - return; +void ExtractAPIActionBase::ImplEndSourceFileAction(CompilerInstance &CI) { + SymbolGraphSerializerOption SerializationOptions; + SerializationOptions.Compact = !CI.getFrontendOpts().EmitPrettySymbolGraphs; + SerializationOptions.EmitSymbolLabelsForTesting = + CI.getFrontendOpts().EmitSymbolGraphSymbolLabelsForTesting; + + if (CI.getFrontendOpts().EmitExtensionSymbolGraphs) { + auto ConstructOutputFile = [&CI](Twine BaseName) { + return createAdditionalSymbolGraphFile(CI, BaseName); + }; + + SymbolGraphSerializer::serializeWithExtensionGraphs( + *OS, *API, IgnoresList, ConstructOutputFile, SerializationOptions); + } else { + SymbolGraphSerializer::serializeMainSymbolGraph(*OS, *API, IgnoresList, + SerializationOptions); + } - // Setup a SymbolGraphSerializer to write out collected API information in - // the Symbol Graph format. - // FIXME: Make the kind of APISerializer configurable. - SymbolGraphSerializer SGSerializer(*API, IgnoresList); - SGSerializer.serialize(*OS); + // Flush the stream and close the main output stream. OS.reset(); } -std::unique_ptr -ExtractAPIAction::CreateOutputFile(CompilerInstance &CI, StringRef InFile) { - std::unique_ptr OS; - OS = CI.createDefaultOutputFile(/*Binary=*/false, InFile, - /*Extension=*/"json", - /*RemoveFileOnSignal=*/false); - if (!OS) - return nullptr; - return OS; -} - std::unique_ptr ExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, StringRef InFile) { - OS = CreateOutputFile(CI, InFile); + auto ProductName = CI.getFrontendOpts().ProductName; + + if (CI.getFrontendOpts().SymbolGraphOutputDir.empty()) + OS = CI.createDefaultOutputFile(/*Binary*/ false, InFile, + /*Extension*/ "symbols.json", + /*RemoveFileOnSignal*/ false, + /*CreateMissingDirectories*/ true); + else + OS = createAdditionalSymbolGraphFile(CI, ProductName); if (!OS) return nullptr; - auto ProductName = CI.getFrontendOpts().ProductName; - // Now that we have enough information about the language options and the // target triple, let's create the APISet before anyone uses it. API = std::make_unique( @@ -495,7 +515,9 @@ bool ExtractAPIAction::PrepareToExecuteAction(CompilerInstance &CI) { return true; } -void ExtractAPIAction::EndSourceFileAction() { ImplEndSourceFileAction(); } +void ExtractAPIAction::EndSourceFileAction() { + ImplEndSourceFileAction(getCompilerInstance()); +} std::unique_ptr WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, @@ -506,11 +528,9 @@ WrappingExtractAPIAction::CreateASTConsumer(CompilerInstance &CI, CreatedASTConsumer = true; - OS = CreateOutputFile(CI, InFile); - if (!OS) - return nullptr; - - auto ProductName = CI.getFrontendOpts().ProductName; + ProductName = CI.getFrontendOpts().ProductName; + auto InputFilename = llvm::sys::path::filename(InFile); + OS = createAdditionalSymbolGraphFile(CI, InputFilename); // Now that we have enough information about the language options and the // target triple, let's create the APISet before anyone uses it. @@ -552,32 +572,6 @@ void WrappingExtractAPIAction::EndSourceFileAction() { WrapperFrontendAction::EndSourceFileAction(); if (CreatedASTConsumer) { - ImplEndSourceFileAction(); + ImplEndSourceFileAction(getCompilerInstance()); } } - -std::unique_ptr -WrappingExtractAPIAction::CreateOutputFile(CompilerInstance &CI, - StringRef InFile) { - std::unique_ptr OS; - std::string OutputDir = CI.getFrontendOpts().SymbolGraphOutputDir; - - // The symbol graphs need to be generated as a side effect of regular - // compilation so the output should be dumped in the directory provided with - // the command line option. - llvm::SmallString<128> OutFilePath(OutputDir); - auto Seperator = llvm::sys::path::get_separator(); - auto Infilename = llvm::sys::path::filename(InFile); - OutFilePath.append({Seperator, Infilename}); - llvm::sys::path::replace_extension(OutFilePath, "json"); - // StringRef outputFilePathref = *OutFilePath; - - // don't use the default output file - OS = CI.createOutputFile(/*OutputPath=*/OutFilePath, /*Binary=*/false, - /*RemoveFileOnSignal=*/true, - /*UseTemporary=*/true, - /*CreateMissingDirectories=*/true); - if (!OS) - return nullptr; - return OS; -} diff --git a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp index 545860a..57f966c 100644 --- a/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp +++ b/clang/lib/ExtractAPI/Serialization/SymbolGraphSerializer.cpp @@ -14,13 +14,17 @@ #include "clang/ExtractAPI/Serialization/SymbolGraphSerializer.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Version.h" +#include "clang/ExtractAPI/API.h" #include "clang/ExtractAPI/DeclarationFragments.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLFunctionalExtras.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Path.h" #include "llvm/Support/VersionTuple.h" +#include "llvm/Support/raw_ostream.h" +#include #include #include @@ -33,26 +37,27 @@ namespace { /// Helper function to inject a JSON object \p Obj into another object \p Paren /// at position \p Key. -void serializeObject(Object &Paren, StringRef Key, std::optional Obj) { +void serializeObject(Object &Paren, StringRef Key, + std::optional &&Obj) { if (Obj) Paren[Key] = std::move(*Obj); } -/// Helper function to inject a StringRef \p String into an object \p Paren at -/// position \p Key -void serializeString(Object &Paren, StringRef Key, - std::optional String) { - if (String) - Paren[Key] = std::move(*String); -} - /// Helper function to inject a JSON array \p Array into object \p Paren at /// position \p Key. -void serializeArray(Object &Paren, StringRef Key, std::optional Array) { +void serializeArray(Object &Paren, StringRef Key, + std::optional &&Array) { if (Array) Paren[Key] = std::move(*Array); } +/// Helper function to inject a JSON array composed of the values in \p C into +/// object \p Paren at position \p Key. +template +void serializeArray(Object &Paren, StringRef Key, ContainerTy &&C) { + Paren[Key] = Array(C); +} + /// Serialize a \c VersionTuple \p V with the Symbol Graph semantic version /// format. /// @@ -248,6 +253,7 @@ std::optional serializeDocComment(const DocComment &Comment) { return std::nullopt; Object DocComment; + Array LinesArray; for (const auto &CommentLine : Comment) { Object Line; @@ -256,7 +262,8 @@ std::optional serializeDocComment(const DocComment &Comment) { serializeSourceRange(CommentLine.Begin, CommentLine.End)); LinesArray.emplace_back(std::move(Line)); } - serializeArray(DocComment, "lines", LinesArray); + + serializeArray(DocComment, "lines", std::move(LinesArray)); return DocComment; } @@ -322,19 +329,14 @@ serializeDeclarationFragments(const DeclarationFragments &DF) { /// - \c subHeading : An array of declaration fragments that provides tags, /// and potentially more tokens (for example the \c +/- symbol for /// Objective-C methods). Can be used as sub-headings for documentation. -Object serializeNames(const APIRecord &Record) { +Object serializeNames(const APIRecord *Record) { Object Names; - if (auto *CategoryRecord = - dyn_cast_or_null(&Record)) - Names["title"] = - (CategoryRecord->Interface.Name + " (" + Record.Name + ")").str(); - else - Names["title"] = Record.Name; + Names["title"] = Record->Name; serializeArray(Names, "subHeading", - serializeDeclarationFragments(Record.SubHeading)); + serializeDeclarationFragments(Record->SubHeading)); DeclarationFragments NavigatorFragments; - NavigatorFragments.append(Record.Name, + NavigatorFragments.append(Record->Name, DeclarationFragments::FragmentKind::Identifier, /*PreciseIdentifier*/ ""); serializeArray(Names, "navigator", @@ -351,7 +353,8 @@ Object serializeSymbolKind(APIRecord::RecordKind RK, Language Lang) { Object Kind; switch (RK) { case APIRecord::RK_Unknown: - llvm_unreachable("Records should have an explicit kind"); + Kind["identifier"] = AddLangPrefix("unknown"); + Kind["displayName"] = "Unknown"; break; case APIRecord::RK_Namespace: Kind["identifier"] = AddLangPrefix("namespace"); @@ -484,10 +487,6 @@ Object serializeSymbolKind(APIRecord::RecordKind RK, Language Lang) { Kind["identifier"] = AddLangPrefix("class.extension"); Kind["displayName"] = "Class Extension"; break; - case APIRecord::RK_ObjCCategoryModule: - Kind["identifier"] = AddLangPrefix("module.extension"); - Kind["displayName"] = "Module Extension"; - break; case APIRecord::RK_ObjCProtocol: Kind["identifier"] = AddLangPrefix("protocol"); Kind["displayName"] = "Protocol"; @@ -500,6 +499,8 @@ Object serializeSymbolKind(APIRecord::RecordKind RK, Language Lang) { Kind["identifier"] = AddLangPrefix("typealias"); Kind["displayName"] = "Type Alias"; break; + default: + llvm_unreachable("API Record with uninstantiable kind"); } return Kind; @@ -514,12 +515,18 @@ Object serializeSymbolKind(const APIRecord &Record, Language Lang) { return serializeSymbolKind(Record.getKind(), Lang); } +/// Serialize the function signature field, as specified by the +/// Symbol Graph format. +/// +/// The Symbol Graph function signature property contains two arrays. +/// - The \c returns array is the declaration fragments of the return type; +/// - The \c parameters array contains names and declaration fragments of the +/// parameters. template -std::optional -serializeFunctionSignatureMixinImpl(const RecordTy &Record, std::true_type) { +void serializeFunctionSignatureMixin(Object &Paren, const RecordTy &Record) { const auto &FS = Record.Signature; if (FS.empty()) - return std::nullopt; + return; Object Signature; serializeArray(Signature, "returns", @@ -537,63 +544,14 @@ serializeFunctionSignatureMixinImpl(const RecordTy &Record, std::true_type) { if (!Parameters.empty()) Signature["parameters"] = std::move(Parameters); - return Signature; + serializeObject(Paren, "functionSignature", std::move(Signature)); } template -std::optional -serializeFunctionSignatureMixinImpl(const RecordTy &Record, std::false_type) { - return std::nullopt; -} - -/// Serialize the function signature field, as specified by the -/// Symbol Graph format. -/// -/// The Symbol Graph function signature property contains two arrays. -/// - The \c returns array is the declaration fragments of the return type; -/// - The \c parameters array contains names and declaration fragments of the -/// parameters. -/// -/// \returns \c std::nullopt if \p FS is empty, or an \c Object containing the -/// formatted function signature. -template -void serializeFunctionSignatureMixin(Object &Paren, const RecordTy &Record) { - serializeObject(Paren, "functionSignature", - serializeFunctionSignatureMixinImpl( - Record, has_function_signature())); -} - -template -std::optional serializeAccessMixinImpl(const RecordTy &Record, - std::true_type) { - const auto &AccessControl = Record.Access; - std::string Access; - if (AccessControl.empty()) - return std::nullopt; - Access = AccessControl.getAccess(); - return Access; -} - -template -std::optional serializeAccessMixinImpl(const RecordTy &Record, - std::false_type) { - return std::nullopt; -} - -template -void serializeAccessMixin(Object &Paren, const RecordTy &Record) { - auto accessLevel = serializeAccessMixinImpl(Record, has_access()); - if (!accessLevel.has_value()) - accessLevel = "public"; - serializeString(Paren, "accessLevel", accessLevel); -} - -template -std::optional serializeTemplateMixinImpl(const RecordTy &Record, - std::true_type) { +void serializeTemplateMixin(Object &Paren, const RecordTy &Record) { const auto &Template = Record.Templ; if (Template.empty()) - return std::nullopt; + return; Object Generics; Array GenericParameters; @@ -619,97 +577,66 @@ std::optional serializeTemplateMixinImpl(const RecordTy &Record, if (!GenericConstraints.empty()) Generics["constraints"] = std::move(GenericConstraints); - return Generics; -} - -template -std::optional serializeTemplateMixinImpl(const RecordTy &Record, - std::false_type) { - return std::nullopt; + serializeObject(Paren, "swiftGenerics", Generics); } -template -void serializeTemplateMixin(Object &Paren, const RecordTy &Record) { - serializeObject(Paren, "swiftGenerics", - serializeTemplateMixinImpl(Record, has_template())); -} +Array generateParentContexts(const SmallVectorImpl &Parents, + Language Lang) { + Array ParentContexts; -struct PathComponent { - StringRef USR; - StringRef Name; - APIRecord::RecordKind Kind; + for (const auto &Parent : Parents) { + Object Elem; + Elem["usr"] = Parent.USR; + Elem["name"] = Parent.Name; + if (Parent.Record) + Elem["kind"] = + serializeSymbolKind(Parent.Record->getKind(), Lang)["identifier"]; + else + Elem["kind"] = + serializeSymbolKind(APIRecord::RK_Unknown, Lang)["identifier"]; + ParentContexts.emplace_back(std::move(Elem)); + } - PathComponent(StringRef USR, StringRef Name, APIRecord::RecordKind Kind) - : USR(USR), Name(Name), Kind(Kind) {} -}; + return ParentContexts; +} -template -bool generatePathComponents( - const RecordTy &Record, const APISet &API, - function_ref ComponentTransformer) { - SmallVector ReverseComponenents; - ReverseComponenents.emplace_back(Record.USR, Record.Name, Record.getKind()); - const auto *CurrentParent = &Record.ParentInformation; - bool FailedToFindParent = false; - while (CurrentParent && !CurrentParent->empty()) { - PathComponent CurrentParentComponent(CurrentParent->ParentUSR, - CurrentParent->ParentName, - CurrentParent->ParentKind); - - auto *ParentRecord = CurrentParent->ParentRecord; - // Slow path if we don't have a direct reference to the ParentRecord - if (!ParentRecord) - ParentRecord = API.findRecordForUSR(CurrentParent->ParentUSR); - - // If the parent is a category extended from internal module then we need to - // pretend this belongs to the associated interface. - if (auto *CategoryRecord = - dyn_cast_or_null(ParentRecord)) { - if (!CategoryRecord->IsFromExternalModule) { - ParentRecord = API.findRecordForUSR(CategoryRecord->Interface.USR); - CurrentParentComponent = PathComponent(CategoryRecord->Interface.USR, - CategoryRecord->Interface.Name, - APIRecord::RK_ObjCInterface); - } - } - - // The parent record doesn't exist which means the symbol shouldn't be - // treated as part of the current product. - if (!ParentRecord) { - FailedToFindParent = true; - break; - } - - ReverseComponenents.push_back(std::move(CurrentParentComponent)); - CurrentParent = &ParentRecord->ParentInformation; +/// Walk the records parent information in reverse to generate a hierarchy +/// suitable for serialization. +SmallVector +generateHierarchyFromRecord(const APIRecord *Record) { + SmallVector ReverseHierarchy; + for (const auto *Current = Record; Current != nullptr; + Current = Current->Parent.Record) + ReverseHierarchy.emplace_back(Current); + + return SmallVector( + std::make_move_iterator(ReverseHierarchy.rbegin()), + std::make_move_iterator(ReverseHierarchy.rend())); +} + +SymbolReference getHierarchyReference(const APIRecord *Record, + const APISet &API) { + // If the parent is a category extended from internal module then we need to + // pretend this belongs to the associated interface. + if (auto *CategoryRecord = dyn_cast_or_null(Record)) { + return CategoryRecord->Interface; + // FIXME: TODO generate path components correctly for categories extending + // an external module. } - for (const auto &PC : reverse(ReverseComponenents)) - ComponentTransformer(PC); - - return FailedToFindParent; + return SymbolReference(Record); } -Object serializeParentContext(const PathComponent &PC, Language Lang) { - Object ParentContextElem; - ParentContextElem["usr"] = PC.USR; - ParentContextElem["name"] = PC.Name; - ParentContextElem["kind"] = serializeSymbolKind(PC.Kind, Lang)["identifier"]; - return ParentContextElem; -} +} // namespace -template -Array generateParentContexts(const RecordTy &Record, const APISet &API, - Language Lang) { - Array ParentContexts; - generatePathComponents( - Record, API, [Lang, &ParentContexts](const PathComponent &PC) { - ParentContexts.push_back(serializeParentContext(PC, Lang)); - }); +Object *ExtendedModule::addSymbol(Object &&Symbol) { + Symbols.emplace_back(std::move(Symbol)); + return Symbols.back().getAsObject(); +} - return ParentContexts; +void ExtendedModule::addRelationship(Object &&Relationship) { + Relationships.emplace_back(std::move(Relationship)); } -} // namespace /// Defines the format version emitted by SymbolGraphSerializer. const VersionTuple SymbolGraphSerializer::FormatVersion{0, 5, 3}; @@ -722,84 +649,44 @@ Object SymbolGraphSerializer::serializeMetadata() const { return Metadata; } -Object SymbolGraphSerializer::serializeModule() const { +Object +SymbolGraphSerializer::serializeModuleObject(StringRef ModuleName) const { Object Module; - // The user is expected to always pass `--product-name=` on the command line - // to populate this field. - Module["name"] = API.ProductName; + Module["name"] = ModuleName; serializeObject(Module, "platform", serializePlatform(API.getTarget())); return Module; } -bool SymbolGraphSerializer::shouldSkip(const APIRecord &Record) const { - // Skip explicitly ignored symbols. - if (IgnoresList.shouldIgnore(Record.Name)) +bool SymbolGraphSerializer::shouldSkip(const APIRecord *Record) const { + if (!Record) return true; // Skip unconditionally unavailable symbols - if (Record.Availability.isUnconditionallyUnavailable()) + if (Record->Availability.isUnconditionallyUnavailable()) return true; // Filter out symbols prefixed with an underscored as they are understood to // be symbols clients should not use. - if (Record.Name.starts_with("_")) + if (Record->Name.starts_with("_")) + return true; + + // Skip explicitly ignored symbols. + if (IgnoresList.shouldIgnore(Record->Name)) return true; return false; } -template -std::optional -SymbolGraphSerializer::serializeAPIRecord(const RecordTy &Record) const { - if (shouldSkip(Record)) - return std::nullopt; - - Object Obj; - serializeObject(Obj, "identifier", - serializeIdentifier(Record, API.getLanguage())); - serializeObject(Obj, "kind", serializeSymbolKind(Record, API.getLanguage())); - serializeObject(Obj, "names", serializeNames(Record)); - serializeObject( - Obj, "location", - serializeSourceLocation(Record.Location, /*IncludeFileURI=*/true)); - serializeArray(Obj, "availability", - serializeAvailability(Record.Availability)); - serializeObject(Obj, "docComment", serializeDocComment(Record.Comment)); - serializeArray(Obj, "declarationFragments", - serializeDeclarationFragments(Record.Declaration)); - SmallVector PathComponentsNames; - // If this returns true it indicates that we couldn't find a symbol in the - // hierarchy. - if (generatePathComponents(Record, API, - [&PathComponentsNames](const PathComponent &PC) { - PathComponentsNames.push_back(PC.Name); - })) - return {}; - - serializeArray(Obj, "pathComponents", Array(PathComponentsNames)); +ExtendedModule &SymbolGraphSerializer::getModuleForCurrentSymbol() { + if (!ForceEmitToMainModule && ModuleForCurrentSymbol) + return *ModuleForCurrentSymbol; - serializeFunctionSignatureMixin(Obj, Record); - serializeAccessMixin(Obj, Record); - serializeTemplateMixin(Obj, Record); - - return Obj; + return MainModule; } -template -void SymbolGraphSerializer::serializeMembers( - const APIRecord &Record, - const SmallVector> &Members) { - // Members should not be serialized if we aren't recursing. - if (!ShouldRecurse) - return; - for (const auto &Member : Members) { - auto MemberRecord = serializeAPIRecord(*Member); - if (!MemberRecord) - continue; - - Symbols.emplace_back(std::move(*MemberRecord)); - serializeRelationship(RelationshipKind::MemberOf, *Member, Record); - } +Array SymbolGraphSerializer::serializePathComponents( + const APIRecord *Record) const { + return Array(map_range(Hierarchy, [](auto Elt) { return Elt.Name; })); } StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { @@ -816,6 +703,33 @@ StringRef SymbolGraphSerializer::getRelationshipString(RelationshipKind Kind) { llvm_unreachable("Unhandled relationship kind"); } +void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, + const SymbolReference &Source, + const SymbolReference &Target, + ExtendedModule &Into) { + Object Relationship; + SmallString<64> TestRelLabel; + if (EmitSymbolLabelsForTesting) { + llvm::raw_svector_ostream OS(TestRelLabel); + OS << SymbolGraphSerializer::getRelationshipString(Kind) << " $ " + << Source.USR << " $ "; + if (Target.USR.empty()) + OS << Target.Name; + else + OS << Target.USR; + Relationship["!testRelLabel"] = TestRelLabel; + } + Relationship["source"] = Source.USR; + Relationship["target"] = Target.USR; + Relationship["targetFallback"] = Target.Name; + Relationship["kind"] = SymbolGraphSerializer::getRelationshipString(Kind); + + if (ForceEmitToMainModule) + MainModule.addRelationship(std::move(Relationship)); + else + Into.addRelationship(std::move(Relationship)); +} + StringRef SymbolGraphSerializer::getConstraintString(ConstraintKind Kind) { switch (Kind) { case ConstraintKind::Conformance: @@ -826,430 +740,324 @@ StringRef SymbolGraphSerializer::getConstraintString(ConstraintKind Kind) { llvm_unreachable("Unhandled constraint kind"); } -void SymbolGraphSerializer::serializeRelationship(RelationshipKind Kind, - SymbolReference Source, - SymbolReference Target) { - Object Relationship; - Relationship["source"] = Source.USR; - Relationship["target"] = Target.USR; - Relationship["targetFallback"] = Target.Name; - Relationship["kind"] = getRelationshipString(Kind); - - Relationships.emplace_back(std::move(Relationship)); -} +void SymbolGraphSerializer::serializeAPIRecord(const APIRecord *Record) { + Object Obj; -void SymbolGraphSerializer::visitNamespaceRecord( - const NamespaceRecord &Record) { - auto Namespace = serializeAPIRecord(Record); - if (!Namespace) - return; - Symbols.emplace_back(std::move(*Namespace)); - if (!Record.ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); -} + // If we need symbol labels for testing emit the USR as the value and the key + // starts with '!'' to ensure it ends up at the top of the object. + if (EmitSymbolLabelsForTesting) + Obj["!testLabel"] = Record->USR; -void SymbolGraphSerializer::visitGlobalFunctionRecord( - const GlobalFunctionRecord &Record) { - auto Obj = serializeAPIRecord(Record); - if (!Obj) - return; + serializeObject(Obj, "identifier", + serializeIdentifier(*Record, API.getLanguage())); + serializeObject(Obj, "kind", serializeSymbolKind(*Record, API.getLanguage())); + serializeObject(Obj, "names", serializeNames(Record)); + serializeObject( + Obj, "location", + serializeSourceLocation(Record->Location, /*IncludeFileURI=*/true)); + serializeArray(Obj, "availability", + serializeAvailability(Record->Availability)); + serializeObject(Obj, "docComment", serializeDocComment(Record->Comment)); + serializeArray(Obj, "declarationFragments", + serializeDeclarationFragments(Record->Declaration)); - Symbols.emplace_back(std::move(*Obj)); -} + Obj["pathComponents"] = serializePathComponents(Record); + Obj["accessLevel"] = Record->Access.getAccess(); -void SymbolGraphSerializer::visitGlobalVariableRecord( - const GlobalVariableRecord &Record) { - auto Obj = serializeAPIRecord(Record); - if (!Obj) - return; + ExtendedModule &Module = getModuleForCurrentSymbol(); + // If the hierarchy has at least one parent and child. + if (Hierarchy.size() >= 2) + serializeRelationship(MemberOf, Hierarchy.back(), + Hierarchy[Hierarchy.size() - 2], Module); - Symbols.emplace_back(std::move(*Obj)); + CurrentSymbol = Module.addSymbol(std::move(Obj)); } -void SymbolGraphSerializer::visitEnumRecord(const EnumRecord &Record) { - auto Enum = serializeAPIRecord(Record); - if (!Enum) - return; - - Symbols.emplace_back(std::move(*Enum)); - serializeMembers(Record, Record.Constants); +bool SymbolGraphSerializer::traverseAPIRecord(const APIRecord *Record) { + if (!Record) + return true; + if (shouldSkip(Record)) + return true; + Hierarchy.push_back(getHierarchyReference(Record, API)); + // Defer traversal mechanics to APISetVisitor base implementation + auto RetVal = Base::traverseAPIRecord(Record); + Hierarchy.pop_back(); + return RetVal; } -void SymbolGraphSerializer::visitRecordRecord(const RecordRecord &Record) { - auto SerializedRecord = serializeAPIRecord(Record); - if (!SerializedRecord) - return; - - Symbols.emplace_back(std::move(*SerializedRecord)); - serializeMembers(Record, Record.Fields); +bool SymbolGraphSerializer::visitAPIRecord(const APIRecord *Record) { + serializeAPIRecord(Record); + return true; } -void SymbolGraphSerializer::visitStaticFieldRecord( - const StaticFieldRecord &Record) { - auto StaticField = serializeAPIRecord(Record); - if (!StaticField) - return; - Symbols.emplace_back(std::move(*StaticField)); - serializeRelationship(RelationshipKind::MemberOf, Record, Record.Context); +bool SymbolGraphSerializer::visitGlobalFunctionRecord( + const GlobalFunctionRecord *Record) { + if (!CurrentSymbol) + return true; + + serializeFunctionSignatureMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitCXXClassRecord(const CXXClassRecord &Record) { - auto Class = serializeAPIRecord(Record); - if (!Class) - return; +bool SymbolGraphSerializer::visitCXXClassRecord(const CXXClassRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*Class)); - for (const auto &Base : Record.Bases) - serializeRelationship(RelationshipKind::InheritsFrom, Record, Base); - if (!Record.ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + for (const auto &Base : Record->Bases) + serializeRelationship(RelationshipKind::InheritsFrom, Record, Base, + getModuleForCurrentSymbol()); + return true; } -void SymbolGraphSerializer::visitClassTemplateRecord( - const ClassTemplateRecord &Record) { - auto Class = serializeAPIRecord(Record); - if (!Class) - return; +bool SymbolGraphSerializer::visitClassTemplateRecord( + const ClassTemplateRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*Class)); - for (const auto &Base : Record.Bases) - serializeRelationship(RelationshipKind::InheritsFrom, Record, Base); - if (!Record.ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitClassTemplateSpecializationRecord( - const ClassTemplateSpecializationRecord &Record) { - auto Class = serializeAPIRecord(Record); - if (!Class) - return; - - Symbols.emplace_back(std::move(*Class)); +bool SymbolGraphSerializer::visitClassTemplatePartialSpecializationRecord( + const ClassTemplatePartialSpecializationRecord *Record) { + if (!CurrentSymbol) + return true; - for (const auto &Base : Record.Bases) - serializeRelationship(RelationshipKind::InheritsFrom, Record, Base); - if (!Record.ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitClassTemplatePartialSpecializationRecord( - const ClassTemplatePartialSpecializationRecord &Record) { - auto Class = serializeAPIRecord(Record); - if (!Class) - return; - - Symbols.emplace_back(std::move(*Class)); +bool SymbolGraphSerializer::visitCXXMethodRecord( + const CXXMethodRecord *Record) { + if (!CurrentSymbol) + return true; - for (const auto &Base : Record.Bases) - serializeRelationship(RelationshipKind::InheritsFrom, Record, Base); - if (!Record.ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeFunctionSignatureMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitCXXInstanceMethodRecord( - const CXXInstanceMethodRecord &Record) { - auto InstanceMethod = serializeAPIRecord(Record); - if (!InstanceMethod) - return; +bool SymbolGraphSerializer::visitCXXMethodTemplateRecord( + const CXXMethodTemplateRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*InstanceMethod)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitCXXStaticMethodRecord( - const CXXStaticMethodRecord &Record) { - auto StaticMethod = serializeAPIRecord(Record); - if (!StaticMethod) - return; +bool SymbolGraphSerializer::visitCXXFieldTemplateRecord( + const CXXFieldTemplateRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*StaticMethod)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitMethodTemplateRecord( - const CXXMethodTemplateRecord &Record) { - if (!ShouldRecurse) - // Ignore child symbols - return; - auto MethodTemplate = serializeAPIRecord(Record); - if (!MethodTemplate) - return; - Symbols.emplace_back(std::move(*MethodTemplate)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); -} +bool SymbolGraphSerializer::visitConceptRecord(const ConceptRecord *Record) { + if (!CurrentSymbol) + return true; -void SymbolGraphSerializer::visitMethodTemplateSpecializationRecord( - const CXXMethodTemplateSpecializationRecord &Record) { - if (!ShouldRecurse) - // Ignore child symbols - return; - auto MethodTemplateSpecialization = serializeAPIRecord(Record); - if (!MethodTemplateSpecialization) - return; - Symbols.emplace_back(std::move(*MethodTemplateSpecialization)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitCXXFieldRecord(const CXXFieldRecord &Record) { - if (!ShouldRecurse) - return; - auto CXXField = serializeAPIRecord(Record); - if (!CXXField) - return; - Symbols.emplace_back(std::move(*CXXField)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); -} +bool SymbolGraphSerializer::visitGlobalVariableTemplateRecord( + const GlobalVariableTemplateRecord *Record) { + if (!CurrentSymbol) + return true; -void SymbolGraphSerializer::visitCXXFieldTemplateRecord( - const CXXFieldTemplateRecord &Record) { - if (!ShouldRecurse) - // Ignore child symbols - return; - auto CXXFieldTemplate = serializeAPIRecord(Record); - if (!CXXFieldTemplate) - return; - Symbols.emplace_back(std::move(*CXXFieldTemplate)); - serializeRelationship(RelationshipKind::MemberOf, Record, - Record.ParentInformation.ParentRecord); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitConceptRecord(const ConceptRecord &Record) { - auto Concept = serializeAPIRecord(Record); - if (!Concept) - return; +bool SymbolGraphSerializer:: + visitGlobalVariableTemplatePartialSpecializationRecord( + const GlobalVariableTemplatePartialSpecializationRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*Concept)); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer::visitGlobalVariableTemplateRecord( - const GlobalVariableTemplateRecord &Record) { - auto GlobalVariableTemplate = serializeAPIRecord(Record); - if (!GlobalVariableTemplate) - return; - Symbols.emplace_back(std::move(*GlobalVariableTemplate)); -} +bool SymbolGraphSerializer::visitGlobalFunctionTemplateRecord( + const GlobalFunctionTemplateRecord *Record) { + if (!CurrentSymbol) + return true; -void SymbolGraphSerializer::visitGlobalVariableTemplateSpecializationRecord( - const GlobalVariableTemplateSpecializationRecord &Record) { - auto GlobalVariableTemplateSpecialization = serializeAPIRecord(Record); - if (!GlobalVariableTemplateSpecialization) - return; - Symbols.emplace_back(std::move(*GlobalVariableTemplateSpecialization)); + serializeTemplateMixin(*CurrentSymbol, *Record); + return true; } -void SymbolGraphSerializer:: - visitGlobalVariableTemplatePartialSpecializationRecord( - const GlobalVariableTemplatePartialSpecializationRecord &Record) { - auto GlobalVariableTemplatePartialSpecialization = serializeAPIRecord(Record); - if (!GlobalVariableTemplatePartialSpecialization) - return; - Symbols.emplace_back(std::move(*GlobalVariableTemplatePartialSpecialization)); -} +bool SymbolGraphSerializer::visitObjCContainerRecord( + const ObjCContainerRecord *Record) { + if (!CurrentSymbol) + return true; -void SymbolGraphSerializer::visitGlobalFunctionTemplateRecord( - const GlobalFunctionTemplateRecord &Record) { - auto GlobalFunctionTemplate = serializeAPIRecord(Record); - if (!GlobalFunctionTemplate) - return; - Symbols.emplace_back(std::move(*GlobalFunctionTemplate)); -} + for (const auto &Protocol : Record->Protocols) + serializeRelationship(ConformsTo, Record, Protocol, + getModuleForCurrentSymbol()); -void SymbolGraphSerializer::visitGlobalFunctionTemplateSpecializationRecord( - const GlobalFunctionTemplateSpecializationRecord &Record) { - auto GlobalFunctionTemplateSpecialization = serializeAPIRecord(Record); - if (!GlobalFunctionTemplateSpecialization) - return; - Symbols.emplace_back(std::move(*GlobalFunctionTemplateSpecialization)); + return true; } -void SymbolGraphSerializer::visitObjCContainerRecord( - const ObjCContainerRecord &Record) { - auto ObjCContainer = serializeAPIRecord(Record); - if (!ObjCContainer) - return; +bool SymbolGraphSerializer::visitObjCInterfaceRecord( + const ObjCInterfaceRecord *Record) { + if (!CurrentSymbol) + return true; - Symbols.emplace_back(std::move(*ObjCContainer)); - - serializeMembers(Record, Record.Ivars); - serializeMembers(Record, Record.Methods); - serializeMembers(Record, Record.Properties); - - for (const auto &Protocol : Record.Protocols) - // Record that Record conforms to Protocol. - serializeRelationship(RelationshipKind::ConformsTo, Record, Protocol); - - if (auto *ObjCInterface = dyn_cast(&Record)) { - if (!ObjCInterface->SuperClass.empty()) - // If Record is an Objective-C interface record and it has a super class, - // record that Record is inherited from SuperClass. - serializeRelationship(RelationshipKind::InheritsFrom, Record, - ObjCInterface->SuperClass); - - // Members of categories extending an interface are serialized as members of - // the interface. - for (const auto *Category : ObjCInterface->Categories) { - serializeMembers(Record, Category->Ivars); - serializeMembers(Record, Category->Methods); - serializeMembers(Record, Category->Properties); - - // Surface the protocols of the category to the interface. - for (const auto &Protocol : Category->Protocols) - serializeRelationship(RelationshipKind::ConformsTo, Record, Protocol); - } - } + if (!Record->SuperClass.empty()) + serializeRelationship(InheritsFrom, Record, Record->SuperClass, + getModuleForCurrentSymbol()); + return true; } -void SymbolGraphSerializer::visitObjCCategoryRecord( - const ObjCCategoryRecord &Record) { - if (!Record.IsFromExternalModule) - return; - - // Check if the current Category' parent has been visited before, if so skip. - if (!visitedCategories.contains(Record.Interface.Name)) { - visitedCategories.insert(Record.Interface.Name); - Object Obj; - serializeObject(Obj, "identifier", - serializeIdentifier(Record, API.getLanguage())); - serializeObject(Obj, "kind", - serializeSymbolKind(APIRecord::RK_ObjCCategoryModule, - API.getLanguage())); - Obj["accessLevel"] = "public"; - Symbols.emplace_back(std::move(Obj)); - } +bool SymbolGraphSerializer::traverseObjCCategoryRecord( + const ObjCCategoryRecord *Record) { + auto *CurrentModule = ModuleForCurrentSymbol; + if (Record->isExtendingExternalModule()) + ModuleForCurrentSymbol = &ExtendedModules[Record->Interface.Source]; - Object Relationship; - Relationship["source"] = Record.USR; - Relationship["target"] = Record.Interface.USR; - Relationship["targetFallback"] = Record.Interface.Name; - Relationship["kind"] = getRelationshipString(RelationshipKind::ExtensionTo); - Relationships.emplace_back(std::move(Relationship)); + if (!walkUpFromObjCCategoryRecord(Record)) + return false; - auto ObjCCategory = serializeAPIRecord(Record); + bool RetVal = traverseRecordContext(Record); + ModuleForCurrentSymbol = CurrentModule; + return RetVal; +} - if (!ObjCCategory) - return; +bool SymbolGraphSerializer::walkUpFromObjCCategoryRecord( + const ObjCCategoryRecord *Record) { + return visitObjCCategoryRecord(Record); +} - Symbols.emplace_back(std::move(*ObjCCategory)); - serializeMembers(Record, Record.Methods); - serializeMembers(Record, Record.Properties); +bool SymbolGraphSerializer::visitObjCCategoryRecord( + const ObjCCategoryRecord *Record) { + // If we need to create a record for the category in the future do so here, + // otherwise everything is set up to pretend that the category is in fact the + // interface it extends. + for (const auto &Protocol : Record->Protocols) + serializeRelationship(ConformsTo, Record->Interface, Protocol, + getModuleForCurrentSymbol()); - // Surface the protocols of the category to the interface. - for (const auto &Protocol : Record.Protocols) - serializeRelationship(RelationshipKind::ConformsTo, Record, Protocol); + return true; } -void SymbolGraphSerializer::visitMacroDefinitionRecord( - const MacroDefinitionRecord &Record) { - auto Macro = serializeAPIRecord(Record); +bool SymbolGraphSerializer::visitObjCMethodRecord( + const ObjCMethodRecord *Record) { + if (!CurrentSymbol) + return true; - if (!Macro) - return; + serializeFunctionSignatureMixin(*CurrentSymbol, *Record); + return true; +} - Symbols.emplace_back(std::move(*Macro)); +bool SymbolGraphSerializer::visitObjCInstanceVariableRecord( + const ObjCInstanceVariableRecord *Record) { + // FIXME: serialize ivar access control here. + return true; } -void SymbolGraphSerializer::serializeSingleRecord(const APIRecord *Record) { - switch (Record->getKind()) { - case APIRecord::RK_Unknown: - llvm_unreachable("Records should have a known kind!"); - case APIRecord::RK_GlobalFunction: - visitGlobalFunctionRecord(*cast(Record)); - break; - case APIRecord::RK_GlobalVariable: - visitGlobalVariableRecord(*cast(Record)); - break; - case APIRecord::RK_Enum: - visitEnumRecord(*cast(Record)); - break; - case APIRecord::RK_Struct: - LLVM_FALLTHROUGH; - case APIRecord::RK_Union: - visitRecordRecord(*cast(Record)); - break; - case APIRecord::RK_StaticField: - visitStaticFieldRecord(*cast(Record)); - break; - case APIRecord::RK_CXXClass: - visitCXXClassRecord(*cast(Record)); - break; - case APIRecord::RK_ObjCInterface: - visitObjCContainerRecord(*cast(Record)); - break; - case APIRecord::RK_ObjCProtocol: - visitObjCContainerRecord(*cast(Record)); - break; - case APIRecord::RK_ObjCCategory: - visitObjCCategoryRecord(*cast(Record)); - break; - case APIRecord::RK_MacroDefinition: - visitMacroDefinitionRecord(*cast(Record)); - break; - case APIRecord::RK_Typedef: - visitTypedefRecord(*cast(Record)); - break; - default: - if (auto Obj = serializeAPIRecord(*Record)) { - Symbols.emplace_back(std::move(*Obj)); - auto &ParentInformation = Record->ParentInformation; - if (!ParentInformation.empty()) - serializeRelationship(RelationshipKind::MemberOf, *Record, - *ParentInformation.ParentRecord); - } - break; - } +bool SymbolGraphSerializer::walkUpFromTypedefRecord( + const TypedefRecord *Record) { + // Short-circuit walking up the class hierarchy and handle creating typedef + // symbol objects manually as there are additional symbol dropping rules to + // respect. + return visitTypedefRecord(Record); } -void SymbolGraphSerializer::visitTypedefRecord(const TypedefRecord &Record) { +bool SymbolGraphSerializer::visitTypedefRecord(const TypedefRecord *Record) { // Typedefs of anonymous types have their entries unified with the underlying // type. - bool ShouldDrop = Record.UnderlyingType.Name.empty(); + bool ShouldDrop = Record->UnderlyingType.Name.empty(); // enums declared with `NS_OPTION` have a named enum and a named typedef, with // the same name - ShouldDrop |= (Record.UnderlyingType.Name == Record.Name); + ShouldDrop |= (Record->UnderlyingType.Name == Record->Name); if (ShouldDrop) - return; + return true; - auto Typedef = serializeAPIRecord(Record); - if (!Typedef) - return; + // Create the symbol record if the other symbol droppping rules permit it. + serializeAPIRecord(Record); + if (!CurrentSymbol) + return true; - (*Typedef)["type"] = Record.UnderlyingType.USR; + (*CurrentSymbol)["type"] = Record->UnderlyingType.USR; - Symbols.emplace_back(std::move(*Typedef)); + return true; } -Object SymbolGraphSerializer::serialize() { - traverseAPISet(); - return serializeCurrentGraph(); +void SymbolGraphSerializer::serializeSingleRecord(const APIRecord *Record) { + switch (Record->getKind()) { + // dispatch to the relevant walkUpFromMethod +#define CONCRETE_RECORD(CLASS, BASE, KIND) \ + case APIRecord::KIND: { \ + walkUpFrom##CLASS(static_cast(Record)); \ + break; \ + } +#include "clang/ExtractAPI/APIRecords.inc" + // otherwise fallback on the only behavior we can implement safely. + case APIRecord::RK_Unknown: + visitAPIRecord(Record); + break; + default: + llvm_unreachable("API Record with uninstantiable kind"); + } } -Object SymbolGraphSerializer::serializeCurrentGraph() { +Object SymbolGraphSerializer::serializeGraph(StringRef ModuleName, + ExtendedModule &&EM) { Object Root; serializeObject(Root, "metadata", serializeMetadata()); - serializeObject(Root, "module", serializeModule()); + serializeObject(Root, "module", serializeModuleObject(ModuleName)); - Root["symbols"] = std::move(Symbols); - Root["relationships"] = std::move(Relationships); + Root["symbols"] = std::move(EM.Symbols); + Root["relationships"] = std::move(EM.Relationships); return Root; } -void SymbolGraphSerializer::serialize(raw_ostream &os) { - Object root = serialize(); +void SymbolGraphSerializer::serializeGraphToStream( + raw_ostream &OS, SymbolGraphSerializerOption Options, StringRef ModuleName, + ExtendedModule &&EM) { + Object Root = serializeGraph(ModuleName, std::move(EM)); if (Options.Compact) - os << formatv("{0}", Value(std::move(root))) << "\n"; + OS << formatv("{0}", Value(std::move(Root))) << "\n"; else - os << formatv("{0:2}", Value(std::move(root))) << "\n"; + OS << formatv("{0:2}", Value(std::move(Root))) << "\n"; +} + +void SymbolGraphSerializer::serializeMainSymbolGraph( + raw_ostream &OS, const APISet &API, const APIIgnoresList &IgnoresList, + SymbolGraphSerializerOption Options) { + SymbolGraphSerializer Serializer(API, IgnoresList, + Options.EmitSymbolLabelsForTesting); + Serializer.traverseAPISet(); + Serializer.serializeGraphToStream(OS, Options, API.ProductName, + std::move(Serializer.MainModule)); + // FIXME: TODO handle extended modules here +} + +void SymbolGraphSerializer::serializeWithExtensionGraphs( + raw_ostream &MainOutput, const APISet &API, + const APIIgnoresList &IgnoresList, + llvm::function_ref(Twine BaseName)> + CreateOutputStream, + SymbolGraphSerializerOption Options) { + SymbolGraphSerializer Serializer(API, IgnoresList, + Options.EmitSymbolLabelsForTesting); + Serializer.traverseAPISet(); + + Serializer.serializeGraphToStream(MainOutput, Options, API.ProductName, + std::move(Serializer.MainModule)); + + for (auto &ExtensionSGF : Serializer.ExtendedModules) { + if (auto ExtensionOS = + CreateOutputStream(ExtensionSGF.getKey() + "@" + API.ProductName)) + Serializer.serializeGraphToStream(*ExtensionOS, Options, + ExtensionSGF.getKey(), + std::move(ExtensionSGF.getValue())); + } } std::optional @@ -1262,14 +1070,20 @@ SymbolGraphSerializer::serializeSingleSymbolSGF(StringRef USR, Object Root; APIIgnoresList EmptyIgnores; SymbolGraphSerializer Serializer(API, EmptyIgnores, - /*Options.Compact*/ {true}, - /*ShouldRecurse*/ false); + /*EmitSymbolLabelsForTesting*/ false, + /*ForceEmitToMainModule*/ true); + + // Set up serializer parent chain + Serializer.Hierarchy = generateHierarchyFromRecord(Record); + Serializer.serializeSingleRecord(Record); - serializeObject(Root, "symbolGraph", Serializer.serializeCurrentGraph()); + serializeObject(Root, "symbolGraph", + Serializer.serializeGraph(API.ProductName, + std::move(Serializer.MainModule))); Language Lang = API.getLanguage(); serializeArray(Root, "parentContexts", - generateParentContexts(*Record, API, Lang)); + generateParentContexts(Serializer.Hierarchy, Lang)); Array RelatedSymbols; @@ -1287,14 +1101,15 @@ SymbolGraphSerializer::serializeSingleSymbolSGF(StringRef USR, Object RelatedSymbol; RelatedSymbol["usr"] = RelatedRecord->USR; RelatedSymbol["declarationLanguage"] = getLanguageName(Lang); - // TODO: once we record this properly let's serialize it right. - RelatedSymbol["accessLevel"] = "public"; + RelatedSymbol["accessLevel"] = RelatedRecord->Access.getAccess(); RelatedSymbol["filePath"] = RelatedRecord->Location.getFilename(); RelatedSymbol["moduleName"] = API.ProductName; RelatedSymbol["isSystem"] = RelatedRecord->IsFromSystemHeader; serializeArray(RelatedSymbol, "parentContexts", - generateParentContexts(*RelatedRecord, API, Lang)); + generateParentContexts( + generateHierarchyFromRecord(RelatedRecord), Lang)); + RelatedSymbols.push_back(std::move(RelatedSymbol)); } diff --git a/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp b/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp index 3a5f62c..41e4e0c 100644 --- a/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp +++ b/clang/lib/ExtractAPI/TypedefUnderlyingTypeResolver.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/ExtractAPI/TypedefUnderlyingTypeResolver.h" +#include "clang/Basic/Module.h" #include "clang/Index/USRGeneration.h" using namespace clang; @@ -50,17 +51,20 @@ TypedefUnderlyingTypeResolver::getSymbolReferenceForType(QualType Type, SmallString<128> TypeUSR; const NamedDecl *TypeDecl = getUnderlyingTypeDecl(Type); const TypedefType *TypedefTy = Type->getAs(); + StringRef OwningModuleName; if (TypeDecl) { if (!TypedefTy) TypeName = TypeDecl->getName().str(); clang::index::generateUSRForDecl(TypeDecl, TypeUSR); + if (auto *OwningModule = TypeDecl->getImportedOwningModule()) + OwningModuleName = OwningModule->Name; } else { clang::index::generateUSRForType(Type, Context, TypeUSR); } - return {API.copyString(TypeName), API.copyString(TypeUSR)}; + return API.createSymbolReference(TypeName, TypeUSR, OwningModuleName); } std::string TypedefUnderlyingTypeResolver::getUSRForType(QualType Type) const { diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index 2446aee..f85f036 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -181,9 +181,13 @@ CreateFrontendAction(CompilerInstance &CI) { #endif // Wrap the base FE action in an extract api action to generate - // symbol graph as a biproduct of compilation ( enabled with - // --emit-symbol-graph option ) - if (!FEOpts.SymbolGraphOutputDir.empty()) { + // symbol graph as a biproduct of compilation (enabled with + // --emit-symbol-graph option) + if (FEOpts.EmitSymbolGraph) { + if (FEOpts.SymbolGraphOutputDir.empty()) { + CI.getDiagnostics().Report(diag::warn_missing_symbol_graph_dir); + CI.getFrontendOpts().SymbolGraphOutputDir = "."; + } CI.getCodeGenOpts().ClearASTBeforeBackend = false; Act = std::make_unique(std::move(Act)); } diff --git a/clang/test/ExtractAPI/anonymous_record_no_typedef.c b/clang/test/ExtractAPI/anonymous_record_no_typedef.c index 0e50f4a..049e8b1 100644 --- a/clang/test/ExtractAPI/anonymous_record_no_typedef.c +++ b/clang/test/ExtractAPI/anonymous_record_no_typedef.c @@ -1,8 +1,9 @@ +// XFAIL: * // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/availability.c b/clang/test/ExtractAPI/availability.c index 3c1ef5c..12ac73f 100644 --- a/clang/test/ExtractAPI/availability.c +++ b/clang/test/ExtractAPI/availability.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api --product-name=Availability -triple arm64-apple-macosx -x c-header %t/input.h -o %t/output.json -verify +// RUN: %clang_cc1 -extract-api --pretty-sgf --product-name=Availability -triple arm64-apple-macosx -x c-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ diff --git a/clang/test/ExtractAPI/bool.c b/clang/test/ExtractAPI/bool.c index f4082ed..efab6df 100644 --- a/clang/test/ExtractAPI/bool.c +++ b/clang/test/ExtractAPI/bool.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf -target arm64-apple-macosx \ // RUN: %t/input.h -o %t/output.json // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/bool.cpp b/clang/test/ExtractAPI/bool.cpp index 1b445e2..f7d10c6 100644 --- a/clang/test/ExtractAPI/bool.cpp +++ b/clang/test/ExtractAPI/bool.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/class.cpp b/clang/test/ExtractAPI/class.cpp index 21cac43..0c5db8e 100644 --- a/clang/test/ExtractAPI/class.cpp +++ b/clang/test/ExtractAPI/class.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/class_template.cpp b/clang/test/ExtractAPI/class_template.cpp index b04dca6..4f2670d 100644 --- a/clang/test/ExtractAPI/class_template.cpp +++ b/clang/test/ExtractAPI/class_template.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/class_template_param_inheritance.cpp b/clang/test/ExtractAPI/class_template_param_inheritance.cpp index 0d38fd1..3d7b09f 100644 --- a/clang/test/ExtractAPI/class_template_param_inheritance.cpp +++ b/clang/test/ExtractAPI/class_template_param_inheritance.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/class_template_partial_spec.cpp b/clang/test/ExtractAPI/class_template_partial_spec.cpp index eba0693..c8d9cc7 100644 --- a/clang/test/ExtractAPI/class_template_partial_spec.cpp +++ b/clang/test/ExtractAPI/class_template_partial_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. @@ -15,7 +15,7 @@ template class Foo {}; template class Foo {}; -/// expected-no-diagnostics +// expected-no-diagnostics //--- reference.output.json.in { diff --git a/clang/test/ExtractAPI/class_template_spec.cpp b/clang/test/ExtractAPI/class_template_spec.cpp index 4b183cb..06a9531 100644 --- a/clang/test/ExtractAPI/class_template_spec.cpp +++ b/clang/test/ExtractAPI/class_template_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/concept.cpp b/clang/test/ExtractAPI/concept.cpp index ff4e710..443eac2 100644 --- a/clang/test/ExtractAPI/concept.cpp +++ b/clang/test/ExtractAPI/concept.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -std=c++20 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -std=c++20 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/constructor_destructor.cpp b/clang/test/ExtractAPI/constructor_destructor.cpp index 9742d4b..27112c9 100644 --- a/clang/test/ExtractAPI/constructor_destructor.cpp +++ b/clang/test/ExtractAPI/constructor_destructor.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. @@ -137,7 +137,7 @@ class Foo { "precise": "c:@S@Foo@F@Foo#" }, "kind": { - "displayName": "Instance Method", + "displayName": "Constructor", "identifier": "c++.method" }, "location": { @@ -193,7 +193,7 @@ class Foo { "precise": "c:@S@Foo@F@~Foo#" }, "kind": { - "displayName": "Instance Method", + "displayName": "Destructor", "identifier": "c++.method" }, "location": { diff --git a/clang/test/ExtractAPI/conversions.cpp b/clang/test/ExtractAPI/conversions.cpp index fc8d067..07688ff 100644 --- a/clang/test/ExtractAPI/conversions.cpp +++ b/clang/test/ExtractAPI/conversions.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c b/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c index e6b72d5..e668f69 100644 --- a/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c +++ b/clang/test/ExtractAPI/emit-symbol-graph/multi_file.c @@ -5,18 +5,19 @@ // RUN: %t/reference.main.json.in >> %t/reference.main.json // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.test.json.in >> %t/reference.test.json -// RUN: %clang_cc1 %t/test.c %t/main.c --emit-symbol-graph=%t/SymbolGraphs --product-name=multifile_test -triple=x86_64-apple-macosx12.0.0 +// RUN: %clang_cc1 %t/test.c %t/main.c -emit-symbol-graph --pretty-sgf \ +// RUN: --symbol-graph-dir=%t/SymbolGraphs --product-name=multifile_test -triple=x86_64-apple-macosx12.0.0 // Test main.json // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/SymbolGraphs/main.json > %t/output-normalized.json +// RUN: %t/SymbolGraphs/main.c.symbols.json > %t/output-normalized.json // RUN: diff %t/reference.main.json %t/output-normalized.json // Test test.json // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/SymbolGraphs/test.json > %t/output-normalized.json +// RUN: %t/SymbolGraphs/test.c.symbols.json > %t/output-normalized.json // RUN: diff %t/reference.test.json %t/output-normalized.json // CHECK-NOT: error: diff --git a/clang/test/ExtractAPI/emit-symbol-graph/single_file.c b/clang/test/ExtractAPI/emit-symbol-graph/single_file.c index 8599e82..b00b5f5 100644 --- a/clang/test/ExtractAPI/emit-symbol-graph/single_file.c +++ b/clang/test/ExtractAPI/emit-symbol-graph/single_file.c @@ -3,11 +3,12 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 %t/main.c --emit-symbol-graph=%t/SymbolGraphs --product-name=basicfile -triple=x86_64-apple-macosx12.0.0 +// RUN: %clang_cc1 %t/main.c -emit-symbol-graph --pretty-sgf \ +// RUN: --symbol-graph-dir=%t/SymbolGraphs --product-name=basicfile -triple=x86_64-apple-macosx12.0.0 // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/SymbolGraphs/main.json >> %t/output-normalized.json +// RUN: %t/SymbolGraphs/main.c.symbols.json >> %t/output-normalized.json // RUN: diff %t/reference.output.json %t/output-normalized.json // CHECK-NOT: error: diff --git a/clang/test/ExtractAPI/enum.c b/clang/test/ExtractAPI/enum.c index 94499d9..1cdf45c 100644 --- a/clang/test/ExtractAPI/enum.c +++ b/clang/test/ExtractAPI/enum.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/field_template.cpp b/clang/test/ExtractAPI/field_template.cpp index f05e826..2058ed0 100644 --- a/clang/test/ExtractAPI/field_template.cpp +++ b/clang/test/ExtractAPI/field_template.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/function_noexcepts.cpp b/clang/test/ExtractAPI/function_noexcepts.cpp index 3fc7263..d95eaaa 100644 --- a/clang/test/ExtractAPI/function_noexcepts.cpp +++ b/clang/test/ExtractAPI/function_noexcepts.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_func_template.cpp b/clang/test/ExtractAPI/global_func_template.cpp index 8def974..f43a618 100644 --- a/clang/test/ExtractAPI/global_func_template.cpp +++ b/clang/test/ExtractAPI/global_func_template.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_func_template_spec.cpp b/clang/test/ExtractAPI/global_func_template_spec.cpp index a24263d..fe046e9 100644 --- a/clang/test/ExtractAPI/global_func_template_spec.cpp +++ b/clang/test/ExtractAPI/global_func_template_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_record.c b/clang/test/ExtractAPI/global_record.c index 623032b..a08d51d 100644 --- a/clang/test/ExtractAPI/global_record.c +++ b/clang/test/ExtractAPI/global_record.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf --product-name=GlobalRecord -target arm64-apple-macosx \ // RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_record_multifile.c b/clang/test/ExtractAPI/global_record_multifile.c index f9d3889..ffdfbcb 100644 --- a/clang/test/ExtractAPI/global_record_multifile.c +++ b/clang/test/ExtractAPI/global_record_multifile.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf --product-name=GlobalRecord -target arm64-apple-macosx \ // RUN: %t/input1.h %t/input2.h %t/input3.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_var_template.cpp b/clang/test/ExtractAPI/global_var_template.cpp index bee2ea6..94f3713 100644 --- a/clang/test/ExtractAPI/global_var_template.cpp +++ b/clang/test/ExtractAPI/global_var_template.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_var_template_partial_spec.cpp b/clang/test/ExtractAPI/global_var_template_partial_spec.cpp index e98076c..91084f25 100644 --- a/clang/test/ExtractAPI/global_var_template_partial_spec.cpp +++ b/clang/test/ExtractAPI/global_var_template_partial_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/global_var_template_spec.cpp b/clang/test/ExtractAPI/global_var_template_spec.cpp index cca2ab3..ff4d8d1 100644 --- a/clang/test/ExtractAPI/global_var_template_spec.cpp +++ b/clang/test/ExtractAPI/global_var_template_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/known_files_only.c b/clang/test/ExtractAPI/known_files_only.c index 68881aa..de1e786 100644 --- a/clang/test/ExtractAPI/known_files_only.c +++ b/clang/test/ExtractAPI/known_files_only.c @@ -1,17 +1,7 @@ // RUN: rm -rf %t // RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=GlobalRecord -target arm64-apple-macosx \ -// RUN: %t/input1.h -o %t/output.json | FileCheck -allow-empty %s - -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: +// RUN: %clang_cc1 -extract-api --pretty-sgf --product-name=GlobalRecord -triple arm64-apple-macosx \ +// RUN: %t/input1.h -verify -o - | FileCheck %s //--- input1.h int num; @@ -24,87 +14,6 @@ char not_emitted; void foo(int); struct Foo { int a; }; -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "GlobalRecord", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "num" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@num" - }, - "kind": { - "displayName": "Global Variable", - "identifier": "c.var" - }, - "location": { - "position": { - "character": 4, - "line": 0 - }, - "uri": "file://INPUT_DIR/input1.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "num" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "num" - } - ], - "title": "num" - }, - "pathComponents": [ - "num" - ] - } - ] -} +// CHECK-NOT: input2.h + +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/language.c b/clang/test/ExtractAPI/language.c index fe98626..90832fd 100644 --- a/clang/test/ExtractAPI/language.c +++ b/clang/test/ExtractAPI/language.c @@ -7,11 +7,11 @@ // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/objcpp.reference.output.json.in >> %t/objcpp.reference.output.json -// RUN: %clang_cc1 -extract-api -x c-header -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -x c-header -triple arm64-apple-macosx \ // RUN: %t/c.h -o %t/c.output.json | FileCheck -allow-empty %s -// RUN: %clang_cc1 -extract-api -x objective-c-header -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -x objective-c-header -triple arm64-apple-macosx \ // RUN: %t/objc.h -o %t/objc.output.json | FileCheck -allow-empty %s -// RUN: %clang_cc1 -extract-api -x objective-c++-header -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -x objective-c++-header -triple arm64-apple-macosx \ // RUN: %t/objcpp.h -o %t/objcpp.output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/macro_undefined.c b/clang/test/ExtractAPI/macro_undefined.c index 1a4ed20..ec60f95 100644 --- a/clang/test/ExtractAPI/macro_undefined.c +++ b/clang/test/ExtractAPI/macro_undefined.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=Macros -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf --product-name=Macros -target arm64-apple-macosx \ // RUN: -x objective-c-header %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/macros.c b/clang/test/ExtractAPI/macros.c index d5807f6..10003fe 100644 --- a/clang/test/ExtractAPI/macros.c +++ b/clang/test/ExtractAPI/macros.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=Macros -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf --product-name=Macros -target arm64-apple-macosx \ // RUN: -x objective-c-header %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/metadata_and_module.c b/clang/test/ExtractAPI/metadata_and_module.c new file mode 100644 index 0000000..79574a2 --- /dev/null +++ b/clang/test/ExtractAPI/metadata_and_module.c @@ -0,0 +1,32 @@ +// RUN: rm -rf %t +// RUN: %clang_cc1 -extract-api --pretty-sgf --product-name=module -triple arm64-apple-macosx -x c-header %s -o %t/module.symbols.json -verify + +// RUN: FileCheck %s --input-file %t/module.symbols.json --check-prefix METADATA +// RUN: FileCheck %s --input-file %t/module.symbols.json --check-prefix MOD + +// expected-no-diagnostics + +// METADATA: "metadata": { +// METADATA-NEXT: "formatVersion": { +// METADATA-NEXT: "major": +// METADATA-NEXT: "minor": +// METADATA-NEXT: "patch": +// METADATA-NEXT: }, +// METADATA-NEXT: "generator": +// METADATA-NEXT: } + +// MOD: "module": { +// MOD-NEXT: "name": "module", +// MOD-NEXT: "platform": { +// MOD-NEXT: "architecture": "arm64", +// MOD-NEXT: "operatingSystem": { +// MOD-NEXT: "minimumVersion": { +// MOD-NEXT: "major": +// MOD-NEXT: "minor": +// MOD-NEXT: "patch": +// MOD-NEXT: }, +// MOD-NEXT: "name": "macosx" +// MOD-NEXT: }, +// MOD-NEXT: "vendor": "apple" +// MOD-NEXT: } +// MOD-NEXT: } diff --git a/clang/test/ExtractAPI/method_template.cpp b/clang/test/ExtractAPI/method_template.cpp index 8d83233..714f9ca 100644 --- a/clang/test/ExtractAPI/method_template.cpp +++ b/clang/test/ExtractAPI/method_template.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/method_template_spec.cpp b/clang/test/ExtractAPI/method_template_spec.cpp index 706d99d..8eaffde 100644 --- a/clang/test/ExtractAPI/method_template_spec.cpp +++ b/clang/test/ExtractAPI/method_template_spec.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/methods.cpp b/clang/test/ExtractAPI/methods.cpp index 8b024a8..412c0bb 100644 --- a/clang/test/ExtractAPI/methods.cpp +++ b/clang/test/ExtractAPI/methods.cpp @@ -1,467 +1,221 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ -// RUN: -x c++-header %t/input.h -o %t/output.json -verify +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -triple arm64-apple-macosx -x c++-header %s -o %t/output.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -//--- input.h class Foo { + // RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GETCOUNT int getCount(); + // GETCOUNT: "!testRelLabel": "memberOf $ c:@S@Foo@F@getCount# $ c:@S@Foo" + // GETCOUNT-LABEL: "!testLabel": "c:@S@Foo@F@getCount#" + // GETCOUNT: "accessLevel": "private", + // GETCOUNT: "declarationFragments": [ + // GETCOUNT-NEXT: { + // GETCOUNT-NEXT: "kind": "typeIdentifier", + // GETCOUNT-NEXT: "preciseIdentifier": "c:I", + // GETCOUNT-NEXT: "spelling": "int" + // GETCOUNT-NEXT: }, + // GETCOUNT-NEXT: { + // GETCOUNT-NEXT: "kind": "text", + // GETCOUNT-NEXT: "spelling": " " + // GETCOUNT-NEXT: }, + // GETCOUNT-NEXT: { + // GETCOUNT-NEXT: "kind": "identifier", + // GETCOUNT-NEXT: "spelling": "getCount" + // GETCOUNT-NEXT: }, + // GETCOUNT-NEXT: { + // GETCOUNT-NEXT: "kind": "text", + // GETCOUNT-NEXT: "spelling": "();" + // GETCOUNT-NEXT: } + // GETCOUNT-NEXT: ], + // GETCOUNT: "functionSignature": { + // GETCOUNT-NEXT: "returns": [ + // GETCOUNT-NEXT: { + // GETCOUNT-NEXT: "kind": "typeIdentifier", + // GETCOUNT-NEXT: "preciseIdentifier": "c:I", + // GETCOUNT-NEXT: "spelling": "int" + // GETCOUNT-NEXT: } + // GETCOUNT-NEXT: ] + // GETCOUNT-NEXT: }, + // GETCOUNT: "displayName": "Instance Method", + // GETCOUNT-NEXT: "identifier": "c++.method" + // GETCOUNT: "title": "getCount" + // GETCOUNT: "pathComponents": [ + // GETCOUNT-NEXT: "Foo", + // GETCOUNT-NEXT: "getCount" + // GETCOUNT-NEXT: ] + // RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix SETL void setLength(int length) noexcept; + // SETL: "!testRelLabel": "memberOf $ c:@S@Foo@F@setLength#I# $ c:@S@Foo" + // SETL-LABEL: "!testLabel": "c:@S@Foo@F@setLength#I#" + // SETL: "declarationFragments": [ + // SETL-NEXT: { + // SETL-NEXT: "kind": "typeIdentifier", + // SETL-NEXT: "preciseIdentifier": "c:v", + // SETL-NEXT: "spelling": "void" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": " " + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "identifier", + // SETL-NEXT: "spelling": "setLength" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": "(" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "typeIdentifier", + // SETL-NEXT: "preciseIdentifier": "c:I", + // SETL-NEXT: "spelling": "int" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": " " + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "internalParam", + // SETL-NEXT: "spelling": "length" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": ")" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": " " + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "keyword", + // SETL-NEXT: "spelling": "noexcept" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": ";" + // SETL-NEXT: } + // SETL-NEXT: ], + // SETL: "functionSignature": { + // SETL-NEXT: "parameters": [ + // SETL-NEXT: { + // SETL-NEXT: "declarationFragments": [ + // SETL-NEXT: { + // SETL-NEXT: "kind": "typeIdentifier", + // SETL-NEXT: "preciseIdentifier": "c:I", + // SETL-NEXT: "spelling": "int" + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "text", + // SETL-NEXT: "spelling": " " + // SETL-NEXT: }, + // SETL-NEXT: { + // SETL-NEXT: "kind": "internalParam", + // SETL-NEXT: "spelling": "length" + // SETL-NEXT: } + // SETL-NEXT: ], + // SETL-NEXT: "name": "length" + // SETL-NEXT: } + // SETL-NEXT: ], + // SETL-NEXT: "returns": [ + // SETL-NEXT: { + // SETL-NEXT: "kind": "typeIdentifier", + // SETL-NEXT: "preciseIdentifier": "c:v", + // SETL-NEXT: "spelling": "void" + // SETL-NEXT: } + // SETL-NEXT: ] + // SETL-NEXT: }, public: + // RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GETFOO static double getFoo(); + // GETFOO: "!testRelLabel": "memberOf $ c:@S@Foo@F@getFoo#S $ c:@S@Foo" + + // GETFOO-LABEL: "!testLabel": "c:@S@Foo@F@getFoo#S" + // GETFOO: "accessLevel": "public", + // GETFOO: "declarationFragments": [ + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "keyword", + // GETFOO-NEXT: "spelling": "static" + // GETFOO-NEXT: }, + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "text", + // GETFOO-NEXT: "spelling": " " + // GETFOO-NEXT: }, + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "typeIdentifier", + // GETFOO-NEXT: "preciseIdentifier": "c:d", + // GETFOO-NEXT: "spelling": "double" + // GETFOO-NEXT: }, + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "text", + // GETFOO-NEXT: "spelling": " " + // GETFOO-NEXT: }, + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "identifier", + // GETFOO-NEXT: "spelling": "getFoo" + // GETFOO-NEXT: }, + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "text", + // GETFOO-NEXT: "spelling": "();" + // GETFOO-NEXT: } + // GETFOO-NEXT: ], + // GETFOO: "functionSignature": { + // GETFOO-NEXT: "returns": [ + // GETFOO-NEXT: { + // GETFOO-NEXT: "kind": "typeIdentifier", + // GETFOO-NEXT: "preciseIdentifier": "c:d", + // GETFOO-NEXT: "spelling": "double" + // GETFOO-NEXT: } + // GETFOO-NEXT: ] + // GETFOO-NEXT: }, + // GETFOO: "kind": { + // GETFOO-NEXT: "displayName": "Static Method", + // GETFOO-NEXT: "identifier": "c++.type.method" + // GETFOO-NEXT: }, protected: + // RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GETBAR constexpr int getBar() const; + // GETBAR: "!testRelLabel": "memberOf $ c:@S@Foo@F@getBar#1 $ c:@S@Foo" + + // GETBAR-LABEL: "!testLabel": "c:@S@Foo@F@getBar#1" + // GETBAR: "accessLevel": "protected" + // GETBAR: "declarationFragments": [ + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "keyword", + // GETBAR-NEXT: "spelling": "constexpr" + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "text", + // GETBAR-NEXT: "spelling": " " + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "typeIdentifier", + // GETBAR-NEXT: "preciseIdentifier": "c:I", + // GETBAR-NEXT: "spelling": "int" + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "text", + // GETBAR-NEXT: "spelling": " " + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "identifier", + // GETBAR-NEXT: "spelling": "getBar" + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "text", + // GETBAR-NEXT: "spelling": "() " + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "keyword", + // GETBAR-NEXT: "spelling": "const" + // GETBAR-NEXT: }, + // GETBAR-NEXT: { + // GETBAR-NEXT: "kind": "text", + // GETBAR-NEXT: "spelling": ";" + // GETBAR-NEXT: } + // GETBAR-NEXT: ], }; -/// expected-no-diagnostics -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:@S@Foo@F@getCount#", - "target": "c:@S@Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:@S@Foo@F@setLength#I#", - "target": "c:@S@Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:@S@Foo@F@getBar#1", - "target": "c:@S@Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:@S@Foo@F@getFoo#S", - "target": "c:@S@Foo", - "targetFallback": "Foo" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "class" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Foo" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c++", - "precise": "c:@S@Foo" - }, - "kind": { - "displayName": "Class", - "identifier": "c++.class" - }, - "location": { - "position": { - "character": 6, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "title": "Foo" - }, - "pathComponents": [ - "Foo" - ] - }, - { - "accessLevel": "private", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "getCount" - }, - { - "kind": "text", - "spelling": "();" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - } - ] - }, - "identifier": { - "interfaceLanguage": "c++", - "precise": "c:@S@Foo@F@getCount#" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "c++.method" - }, - "location": { - "position": { - "character": 6, - "line": 1 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "getCount" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "getCount" - } - ], - "title": "getCount" - }, - "pathComponents": [ - "Foo", - "getCount" - ] - }, - { - "accessLevel": "private", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "setLength" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "length" - }, - { - "kind": "text", - "spelling": ")" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "noexcept" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "length" - } - ], - "name": "length" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "c++", - "precise": "c:@S@Foo@F@setLength#I#" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "c++.method" - }, - "location": { - "position": { - "character": 7, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "setLength" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "setLength" - } - ], - "title": "setLength" - }, - "pathComponents": [ - "Foo", - "setLength" - ] - }, - { - "accessLevel": "protected", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "constexpr" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "getBar" - }, - { - "kind": "text", - "spelling": "() " - }, - { - "kind": "keyword", - "spelling": "const" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - } - ] - }, - "identifier": { - "interfaceLanguage": "c++", - "precise": "c:@S@Foo@F@getBar#1" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "c++.method" - }, - "location": { - "position": { - "character": 16, - "line": 9 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "getBar" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "getBar" - } - ], - "title": "getBar" - }, - "pathComponents": [ - "Foo", - "getBar" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "static" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:d", - "spelling": "double" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "getFoo" - }, - { - "kind": "text", - "spelling": "();" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:d", - "spelling": "double" - } - ] - }, - "identifier": { - "interfaceLanguage": "c++", - "precise": "c:@S@Foo@F@getFoo#S" - }, - "kind": { - "displayName": "Static Method", - "identifier": "c++.type.method" - }, - "location": { - "position": { - "character": 16, - "line": 6 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "getFoo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "getFoo" - } - ], - "title": "getFoo" - }, - "pathComponents": [ - "Foo", - "getFoo" - ] - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/multiple_inheritance.cpp b/clang/test/ExtractAPI/multiple_inheritance.cpp index a1f069b..7d49cf4 100644 --- a/clang/test/ExtractAPI/multiple_inheritance.cpp +++ b/clang/test/ExtractAPI/multiple_inheritance.cpp @@ -3,7 +3,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/namespace.cpp b/clang/test/ExtractAPI/namespace.cpp index e0c36dd..73e0728 100644 --- a/clang/test/ExtractAPI/namespace.cpp +++ b/clang/test/ExtractAPI/namespace.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -std=c++20 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/nested_namespaces.cpp b/clang/test/ExtractAPI/nested_namespaces.cpp index bd13ef9..c6912cf 100644 --- a/clang/test/ExtractAPI/nested_namespaces.cpp +++ b/clang/test/ExtractAPI/nested_namespaces.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -std=c++20 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/objc_block.m b/clang/test/ExtractAPI/objc_block.m index a7a4f56..4a4335e 100644 --- a/clang/test/ExtractAPI/objc_block.m +++ b/clang/test/ExtractAPI/objc_block.m @@ -1,965 +1,630 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -fblocks -triple arm64-apple-macosx \ -// RUN: -x objective-c-header %t/input.h -o %t/output.json -verify +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -fblocks -triple arm64-apple-macosx -x objective-c-header %s -o %t/output.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -//--- input.h @interface Foo +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix NOPARAM -(void)methodBlockNoParam:(void (^)())block; +// NOPARAM-LABEL: "!testLabel": "c:objc(cs)Foo(im)methodBlockNoParam:" +// NOPARAM: "declarationFragments": [ +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": "- (" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "typeIdentifier", +// NOPARAM-NEXT: "preciseIdentifier": "c:v", +// NOPARAM-NEXT: "spelling": "void" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": ") " +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "identifier", +// NOPARAM-NEXT: "spelling": "methodBlockNoParam:" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": "(" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "typeIdentifier", +// NOPARAM-NEXT: "preciseIdentifier": "c:v", +// NOPARAM-NEXT: "spelling": "void" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": " (^" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": ")()) " +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "internalParam", +// NOPARAM-NEXT: "spelling": "block" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": ";" +// NOPARAM-NEXT: } +// NOPARAM-NEXT: ], +// NOPARAM: "functionSignature": { +// NOPARAM-NEXT: "parameters": [ +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "declarationFragments": [ +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": "(" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "typeIdentifier", +// NOPARAM-NEXT: "preciseIdentifier": "c:v", +// NOPARAM-NEXT: "spelling": "void" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": " (^" +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "text", +// NOPARAM-NEXT: "spelling": ")()) " +// NOPARAM-NEXT: }, +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "internalParam", +// NOPARAM-NEXT: "spelling": "block" +// NOPARAM-NEXT: } +// NOPARAM-NEXT: ], +// NOPARAM-NEXT: "name": "block" +// NOPARAM-NEXT: } +// NOPARAM-NEXT: ], +// NOPARAM-NEXT: "returns": [ +// NOPARAM-NEXT: { +// NOPARAM-NEXT: "kind": "typeIdentifier", +// NOPARAM-NEXT: "preciseIdentifier": "c:v", +// NOPARAM-NEXT: "spelling": "void" +// NOPARAM-NEXT: } +// NOPARAM-NEXT: ] +// NOPARAM-NEXT: } + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix PARAM -(void)methodBlockWithParam:(int (^)(int foo))block; +// PARAM-LABEL: "!testLabel": "c:objc(cs)Foo(im)methodBlockWithParam:" +// PARAM: "declarationFragments": [ +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": "- (" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:v", +// PARAM-NEXT: "spelling": "void" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ") " +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "identifier", +// PARAM-NEXT: "spelling": "methodBlockWithParam:" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": "(" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:I", +// PARAM-NEXT: "spelling": "int" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": " (^" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ")(" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:I", +// PARAM-NEXT: "spelling": "int" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": " " +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "internalParam", +// PARAM-NEXT: "spelling": "foo" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ")) " +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "internalParam", +// PARAM-NEXT: "spelling": "block" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ";" +// PARAM-NEXT: } +// PARAM-NEXT: ], +// PARAM: "functionSignature": { +// PARAM-NEXT: "parameters": [ +// PARAM-NEXT: { +// PARAM-NEXT: "declarationFragments": [ +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": "(" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:I", +// PARAM-NEXT: "spelling": "int" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": " (^" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ")(" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:I", +// PARAM-NEXT: "spelling": "int" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": " " +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "internalParam", +// PARAM-NEXT: "spelling": "foo" +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "text", +// PARAM-NEXT: "spelling": ")) " +// PARAM-NEXT: }, +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "internalParam", +// PARAM-NEXT: "spelling": "block" +// PARAM-NEXT: } +// PARAM-NEXT: ], +// PARAM-NEXT: "name": "block" +// PARAM-NEXT: } +// PARAM-NEXT: ], +// PARAM-NEXT: "returns": [ +// PARAM-NEXT: { +// PARAM-NEXT: "kind": "typeIdentifier", +// PARAM-NEXT: "preciseIdentifier": "c:v", +// PARAM-NEXT: "spelling": "void" +// PARAM-NEXT: } +// PARAM-NEXT: ] +// PARAM-NEXT: } + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix MULTIPARAM -(void)methodBlockWithMultipleParam:(int (^)(int foo, unsigned baz))block; +// MULTIPARAM-LABEL: "!testLabel": "c:objc(cs)Foo(im)methodBlockWithMultipleParam:" +// MULTIPARAM: "declarationFragments": [ +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": "- (" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:v", +// MULTIPARAM-NEXT: "spelling": "void" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ") " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "identifier", +// MULTIPARAM-NEXT: "spelling": "methodBlockWithMultipleParam:" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": "(" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:I", +// MULTIPARAM-NEXT: "spelling": "int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " (^" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ")(" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:I", +// MULTIPARAM-NEXT: "spelling": "int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "foo" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ", " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:i", +// MULTIPARAM-NEXT: "spelling": "unsigned int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "baz" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ")) " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "block" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ";" +// MULTIPARAM-NEXT: } +// MULTIPARAM-NEXT: ], +// MULTIPARAM: "functionSignature": { +// MULTIPARAM-NEXT: "parameters": [ +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "declarationFragments": [ +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": "(" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:I", +// MULTIPARAM-NEXT: "spelling": "int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " (^" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ")(" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:I", +// MULTIPARAM-NEXT: "spelling": "int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "foo" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ", " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:i", +// MULTIPARAM-NEXT: "spelling": "unsigned int" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": " " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "baz" +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "text", +// MULTIPARAM-NEXT: "spelling": ")) " +// MULTIPARAM-NEXT: }, +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "internalParam", +// MULTIPARAM-NEXT: "spelling": "block" +// MULTIPARAM-NEXT: } +// MULTIPARAM-NEXT: ], +// MULTIPARAM-NEXT: "name": "block" +// MULTIPARAM-NEXT: } +// MULTIPARAM-NEXT: ], +// MULTIPARAM-NEXT: "returns": [ +// MULTIPARAM-NEXT: { +// MULTIPARAM-NEXT: "kind": "typeIdentifier", +// MULTIPARAM-NEXT: "preciseIdentifier": "c:v", +// MULTIPARAM-NEXT: "spelling": "void" +// MULTIPARAM-NEXT: } +// MULTIPARAM-NEXT: ] +// MULTIPARAM-NEXT: }, + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix VARIADIC -(void)methodBlockVariadic:(int (^)(int foo, ...))block; +// VARIADIC-LABEL: "!testLabel": "c:objc(cs)Foo(im)methodBlockVariadic:" +// VARIADIC: "declarationFragments": [ +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": "- (" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:v", +// VARIADIC-NEXT: "spelling": "void" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ") " +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "identifier", +// VARIADIC-NEXT: "spelling": "methodBlockVariadic:" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": "(" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:I", +// VARIADIC-NEXT: "spelling": "int" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": " (^" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ")(" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:I", +// VARIADIC-NEXT: "spelling": "int" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": " " +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "internalParam", +// VARIADIC-NEXT: "spelling": "foo" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ", ...)) " +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "internalParam", +// VARIADIC-NEXT: "spelling": "block" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ";" +// VARIADIC-NEXT: } +// VARIADIC-NEXT: ], +// VARIADIC: "functionSignature": { +// VARIADIC-NEXT: "parameters": [ +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "declarationFragments": [ +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": "(" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:I", +// VARIADIC-NEXT: "spelling": "int" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": " (^" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ")(" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:I", +// VARIADIC-NEXT: "spelling": "int" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": " " +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "internalParam", +// VARIADIC-NEXT: "spelling": "foo" +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "text", +// VARIADIC-NEXT: "spelling": ", ...)) " +// VARIADIC-NEXT: }, +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "internalParam", +// VARIADIC-NEXT: "spelling": "block" +// VARIADIC-NEXT: } +// VARIADIC-NEXT: ], +// VARIADIC-NEXT: "name": "block" +// VARIADIC-NEXT: } +// VARIADIC-NEXT: ], +// VARIADIC-NEXT: "returns": [ +// VARIADIC-NEXT: { +// VARIADIC-NEXT: "kind": "typeIdentifier", +// VARIADIC-NEXT: "preciseIdentifier": "c:v", +// VARIADIC-NEXT: "spelling": "void" +// VARIADIC-NEXT: } +// VARIADIC-NEXT: ] +// VARIADIC-NEXT: }, @end +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix FUNC void func(int (^arg)(int foo)); +// FUNC-LABEL: "!testLabel": "c:@F@func" +// FUNC: "declarationFragments": [ +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:v", +// FUNC-NEXT: "spelling": "void" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": " " +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "identifier", +// FUNC-NEXT: "spelling": "func" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": "(" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:I", +// FUNC-NEXT: "spelling": "int" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": " (^" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "internalParam", +// FUNC-NEXT: "spelling": "arg" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": ")(" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:I", +// FUNC-NEXT: "spelling": "int" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": " " +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "internalParam", +// FUNC-NEXT: "spelling": "foo" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": "));" +// FUNC-NEXT: } +// FUNC-NEXT: ], +// FUNC: "functionSignature": { +// FUNC-NEXT: "parameters": [ +// FUNC-NEXT: { +// FUNC-NEXT: "declarationFragments": [ +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:I", +// FUNC-NEXT: "spelling": "int" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": " (^" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "internalParam", +// FUNC-NEXT: "spelling": "arg" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": ")(" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:I", +// FUNC-NEXT: "spelling": "int" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": " " +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "internalParam", +// FUNC-NEXT: "spelling": "foo" +// FUNC-NEXT: }, +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "text", +// FUNC-NEXT: "spelling": ")" +// FUNC-NEXT: } +// FUNC-NEXT: ], +// FUNC-NEXT: "name": "arg" +// FUNC-NEXT: } +// FUNC-NEXT: ], +// FUNC-NEXT: "returns": [ +// FUNC-NEXT: { +// FUNC-NEXT: "kind": "typeIdentifier", +// FUNC-NEXT: "preciseIdentifier": "c:v", +// FUNC-NEXT: "spelling": "void" +// FUNC-NEXT: } +// FUNC-NEXT: ] +// FUNC-NEXT: }, +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GLOBAL int (^global)(int foo); +// GLOBAL-LABEL: "!testLabel": "c:@global" +// GLOBAL: "declarationFragments": [ +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "typeIdentifier", +// GLOBAL-NEXT: "preciseIdentifier": "c:I", +// GLOBAL-NEXT: "spelling": "int" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "text", +// GLOBAL-NEXT: "spelling": " (^" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "identifier", +// GLOBAL-NEXT: "spelling": "global" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "text", +// GLOBAL-NEXT: "spelling": ")(" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "typeIdentifier", +// GLOBAL-NEXT: "preciseIdentifier": "c:I", +// GLOBAL-NEXT: "spelling": "int" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "text", +// GLOBAL-NEXT: "spelling": " " +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "internalParam", +// GLOBAL-NEXT: "spelling": "foo" +// GLOBAL-NEXT: }, +// GLOBAL-NEXT: { +// GLOBAL-NEXT: "kind": "text", +// GLOBAL-NEXT: "spelling": ");" +// GLOBAL-NEXT: } +// GLOBAL-NEXT: ], ///expected-no-diagnostics - -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)Foo(im)methodBlockNoParam:", - "target": "c:objc(cs)Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Foo(im)methodBlockWithParam:", - "target": "c:objc(cs)Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Foo(im)methodBlockWithMultipleParam:", - "target": "c:objc(cs)Foo", - "targetFallback": "Foo" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Foo(im)methodBlockVariadic:", - "target": "c:objc(cs)Foo", - "targetFallback": "Foo" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "identifier", - "spelling": "global" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ");" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:@global" - }, - "kind": { - "displayName": "Global Variable", - "identifier": "objective-c.var" - }, - "location": { - "position": { - "character": 6, - "line": 9 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "global" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "global" - } - ], - "title": "global" - }, - "pathComponents": [ - "global" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "func" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "internalParam", - "spelling": "arg" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": "));" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "internalParam", - "spelling": "arg" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "name": "arg" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:@F@func" - }, - "kind": { - "displayName": "Function", - "identifier": "objective-c.func" - }, - "location": { - "position": { - "character": 5, - "line": 7 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "func" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "func" - } - ], - "title": "func" - }, - "pathComponents": [ - "func" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Foo" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "title": "Foo" - }, - "pathComponents": [ - "Foo" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "methodBlockNoParam:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")()) " - }, - { - "kind": "internalParam", - "spelling": "block" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")()) " - }, - { - "kind": "internalParam", - "spelling": "block" - } - ], - "name": "block" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Foo(im)methodBlockNoParam:" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 1 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "methodBlockNoParam:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "methodBlockNoParam:" - } - ], - "title": "methodBlockNoParam:" - }, - "pathComponents": [ - "Foo", - "methodBlockNoParam:" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "methodBlockWithParam:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ")) " - }, - { - "kind": "internalParam", - "spelling": "block" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ")) " - }, - { - "kind": "internalParam", - "spelling": "block" - } - ], - "name": "block" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Foo(im)methodBlockWithParam:" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "methodBlockWithParam:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "methodBlockWithParam:" - } - ], - "title": "methodBlockWithParam:" - }, - "pathComponents": [ - "Foo", - "methodBlockWithParam:" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "methodBlockWithMultipleParam:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "baz" - }, - { - "kind": "text", - "spelling": ")) " - }, - { - "kind": "internalParam", - "spelling": "block" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "baz" - }, - { - "kind": "text", - "spelling": ")) " - }, - { - "kind": "internalParam", - "spelling": "block" - } - ], - "name": "block" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Foo(im)methodBlockWithMultipleParam:" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "methodBlockWithMultipleParam:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "methodBlockWithMultipleParam:" - } - ], - "title": "methodBlockWithMultipleParam:" - }, - "pathComponents": [ - "Foo", - "methodBlockWithMultipleParam:" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "methodBlockVariadic:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ", ...)) " - }, - { - "kind": "internalParam", - "spelling": "block" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " (^" - }, - { - "kind": "text", - "spelling": ")(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": ", ...)) " - }, - { - "kind": "internalParam", - "spelling": "block" - } - ], - "name": "block" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Foo(im)methodBlockVariadic:" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "methodBlockVariadic:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "methodBlockVariadic:" - } - ], - "title": "methodBlockVariadic:" - }, - "pathComponents": [ - "Foo", - "methodBlockVariadic:" - ] - } - ] -} diff --git a/clang/test/ExtractAPI/objc_category.m b/clang/test/ExtractAPI/objc_category.m index 34b0a9e..9177d40 100644 --- a/clang/test/ExtractAPI/objc_category.m +++ b/clang/test/ExtractAPI/objc_category.m @@ -1,341 +1,21 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %s -o - -verify | FileCheck %s -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h -@protocol Protocol; +@protocol Protocol +@end @interface Interface @end @interface Interface (Category) +// CHECK-DAG: "!testRelLabel": "conformsTo $ c:objc(cs)Interface $ c:objc(pl)Protocol" @property int Property; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(py)Property $ c:objc(cs)Interface" - (void)InstanceMethod; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(im)InstanceMethod $ c:objc(cs)Interface" + (void)ClassMethod; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(cm)ClassMethod $ c:objc(cs)Interface" @end -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(im)InstanceMethod", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(cm)ClassMethod", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(py)Property", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "conformsTo", - "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "title": "Interface" - }, - "pathComponents": [ - "Interface" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "InstanceMethod" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(im)InstanceMethod" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 7 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "InstanceMethod" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "InstanceMethod" - } - ], - "title": "InstanceMethod" - }, - "pathComponents": [ - "Interface", - "InstanceMethod" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "+ (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "ClassMethod" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(cm)ClassMethod" - }, - "kind": { - "displayName": "Type Method", - "identifier": "objective-c.type.method" - }, - "location": { - "position": { - "character": 0, - "line": 8 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "ClassMethod" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "+ " - }, - { - "kind": "identifier", - "spelling": "ClassMethod" - } - ], - "title": "ClassMethod" - }, - "pathComponents": [ - "Interface", - "ClassMethod" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Property" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(py)Property" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 14, - "line": 6 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Property" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Property" - } - ], - "title": "Property" - }, - "pathComponents": [ - "Interface", - "Property" - ] - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/objc_external_category.m b/clang/test/ExtractAPI/objc_external_category.m new file mode 100644 index 0000000..47e699c --- /dev/null +++ b/clang/test/ExtractAPI/objc_external_category.m @@ -0,0 +1,49 @@ +// RUN: rm -rf %t +// RUN: split-file %s %t +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: --emit-extension-symbol-graphs --symbol-graph-dir=%t/symbols \ +// RUN: --product-name=Module -fmodules -fimplicit-module-maps -fmodules-cache-path=%t/modules-cache \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %t/input.h -verify + +//--- input.h +#include "ExternalModule.h" + +@interface ExtInterface (Category) +@property int Property; +- (void)InstanceMethod; ++ (void)ClassMethod; +@end + +@interface ModInterface +@end + +// expected-no-diagnostics + +//--- ExternalModule.h +@interface ExtInterface +@end + +//--- module.modulemap +module ExternalModule { + header "ExternalModule.h" +} + +// RUN: FileCheck %s --input-file %t/symbols/Module.symbols.json --check-prefix MOD +// MOD-NOT: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(py)Property $ c:objc(cs)ExtInterface" +// MOD-NOT: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(im)InstanceMethod $ c:objc(cs)ExtInterface" +// MOD-NOT: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(cm)ClassMethod $ c:objc(cs)ExtInterface" +// MOD-NOT: "!testLabel": "c:objc(cs)ExtInterface(py)Property" +// MOD-NOT: "!testLabel": "c:objc(cs)ExtInterface(im)InstanceMethod" +// MOD-NOT: "!testLabel": "c:objc(cs)ExtInterface(cm)ClassMethod" +// MOD-NOT: "!testLabel": "c:objc(cs)ExtInterface" +// MOD-DAG: "!testLabel": "c:objc(cs)ModInterface" + +// RUN: FileCheck %s --input-file %t/symbols/ExternalModule@Module.symbols.json --check-prefix EXT +// EXT-DAG: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(py)Property $ c:objc(cs)ExtInterface" +// EXT-DAG: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(im)InstanceMethod $ c:objc(cs)ExtInterface" +// EXT-DAG: "!testRelLabel": "memberOf $ c:objc(cs)ExtInterface(cm)ClassMethod $ c:objc(cs)ExtInterface" +// EXT-DAG: "!testLabel": "c:objc(cs)ExtInterface(py)Property" +// EXT-DAG: "!testLabel": "c:objc(cs)ExtInterface(im)InstanceMethod" +// EXT-DAG: "!testLabel": "c:objc(cs)ExtInterface(cm)ClassMethod" +// EXT-NOT: "!testLabel": "c:objc(cs)ExtInterface" +// EXT-NOT: "!testLabel": "c:objc(cs)ModInterface" diff --git a/clang/test/ExtractAPI/objc_id_protocol.m b/clang/test/ExtractAPI/objc_id_protocol.m index 0b0f1b3..f2a03a9 100644 --- a/clang/test/ExtractAPI/objc_id_protocol.m +++ b/clang/test/ExtractAPI/objc_id_protocol.m @@ -1,317 +1,56 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -x objective-c-header -triple arm64-apple-macosx %s -o - -verify | FileCheck %s -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h @protocol MyProtocol @end @interface MyInterface @property(copy, readwrite) id obj1; -@property(readwrite) id *obj2; +// CHECK-LABEL: "!testLabel": "c:objc(cs)MyInterface(py)obj1" +// CHECK: "declarationFragments": [ +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "keyword", +// CHECK-NEXT: "spelling": "@property" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "text", +// CHECK-NEXT: "spelling": " (" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "keyword", +// CHECK-NEXT: "spelling": "copy" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "text", +// CHECK-NEXT: "spelling": ", " +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "keyword", +// CHECK-NEXT: "spelling": "readwrite" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "text", +// CHECK-NEXT: "spelling": ") " +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "typeIdentifier", +// CHECK-NEXT: "preciseIdentifier": "c:Qoobjc(pl)MyProtocol", +// CHECK-NEXT: "spelling": "id" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "text", +// CHECK-NEXT: "spelling": " " +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "identifier", +// CHECK-NEXT: "spelling": "obj1" +// CHECK-NEXT: }, +// CHECK-NEXT: { +// CHECK-NEXT: "kind": "text", +// CHECK-NEXT: "spelling": ";" +// CHECK-NEXT: } +// CHECK-NEXT: ], @end -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)MyInterface(py)obj1", - "target": "c:objc(cs)MyInterface", - "targetFallback": "MyInterface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)MyInterface(py)obj2", - "target": "c:objc(cs)MyInterface", - "targetFallback": "MyInterface" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyInterface" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)MyInterface" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyInterface" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyInterface" - } - ], - "title": "MyInterface" - }, - "pathComponents": [ - "MyInterface" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "copy" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "keyword", - "spelling": "readwrite" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:Qoobjc(pl)MyProtocol", - "spelling": "id" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "obj1" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)MyInterface(py)obj1" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 42, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "obj1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "obj1" - } - ], - "title": "obj1" - }, - "pathComponents": [ - "MyInterface", - "obj1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "readwrite" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:Qoobjc(pl)MyProtocol", - "spelling": "id" - }, - { - "kind": "text", - "spelling": " * " - }, - { - "kind": "identifier", - "spelling": "obj2" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)MyInterface(py)obj2" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 37, - "line": 5 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "obj2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "obj2" - } - ], - "title": "obj2" - }, - "pathComponents": [ - "MyInterface", - "obj2" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@protocol" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyProtocol" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(pl)MyProtocol" - }, - "kind": { - "displayName": "Protocol", - "identifier": "objective-c.protocol" - }, - "location": { - "position": { - "character": 10, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyProtocol" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyProtocol" - } - ], - "title": "MyProtocol" - }, - "pathComponents": [ - "MyProtocol" - ] - } - ] -} + +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/objc_instancetype.m b/clang/test/ExtractAPI/objc_instancetype.m index d9d259f..071ebe4 100644 --- a/clang/test/ExtractAPI/objc_instancetype.m +++ b/clang/test/ExtractAPI/objc_instancetype.m @@ -1,8 +1,8 @@ // RUN: rm -rf %t // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ - // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx -x objective-c-header %t/input.h -o %t/output.json -verify +// RUN: %t/reference.output.json.in >> %t/reference.output.json +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx -x objective-c-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. // RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ diff --git a/clang/test/ExtractAPI/objc_interface.m b/clang/test/ExtractAPI/objc_interface.m index ab1772a..4abccdd 100644 --- a/clang/test/ExtractAPI/objc_interface.m +++ b/clang/test/ExtractAPI/objc_interface.m @@ -1,701 +1,360 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -x objective-c-header -triple arm64-apple-macosx %s -o %t/output.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h -@protocol Protocol; +@protocol Protocol +@end +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix SUPER @interface Super +// SUPER: "!testRelLabel": "conformsTo $ c:objc(cs)Super $ c:objc(pl)Protocol" +// SUPER-LABEL: "!testLabel": "c:objc(cs)Super" +// SUPER: "accessLevel": "public", +// SUPER: "declarationFragments": [ +// SUPER-NEXT: { +// SUPER-NEXT: "kind": "keyword", +// SUPER-NEXT: "spelling": "@interface" +// SUPER-NEXT: }, +// SUPER-NEXT: { +// SUPER-NEXT: "kind": "text", +// SUPER-NEXT: "spelling": " " +// SUPER-NEXT: }, +// SUPER-NEXT: { +// SUPER-NEXT: "kind": "identifier", +// SUPER-NEXT: "spelling": "Super" +// SUPER-NEXT: } +// SUPER-NEXT: ], +// SUPER: "kind": { +// SUPER-NEXT: "displayName": "Class", +// SUPER-NEXT: "identifier": "objective-c.class" +// SUPER-NEXT: }, +// SUPER: "title": "Super" +// SUPER: "pathComponents": [ +// SUPER-NEXT: "Super" +// SUPER-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix PROP @property(readonly, getter=getProperty) unsigned Property; +// PROP: "!testRelLabel": "memberOf $ c:objc(cs)Super(py)Property $ c:objc(cs)Super" +// PROP: "!testLabel": "c:objc(cs)Super(py)Property" +// PROP: "accessLevel": "public", +// PROP: "declarationFragments": [ +// PROP-NEXT: { +// PROP-NEXT: "kind": "keyword", +// PROP-NEXT: "spelling": "@property" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": " (" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "keyword", +// PROP-NEXT: "spelling": "readonly" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": ", " +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "keyword", +// PROP-NEXT: "spelling": "getter" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": "=" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "identifier", +// PROP-NEXT: "spelling": "getProperty" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": ") " +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "typeIdentifier", +// PROP-NEXT: "preciseIdentifier": "c:i", +// PROP-NEXT: "spelling": "unsigned int" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": " " +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "identifier", +// PROP-NEXT: "spelling": "Property" +// PROP-NEXT: }, +// PROP-NEXT: { +// PROP-NEXT: "kind": "text", +// PROP-NEXT: "spelling": ";" +// PROP-NEXT: } +// PROP-NEXT: ], +// PROP: "kind": { +// PROP-NEXT: "displayName": "Instance Property", +// PROP-NEXT: "identifier": "objective-c.property" +// PROP-NEXT: }, +// PROP: "title": "Property" +// PROP: "pathComponents": [ +// PROP-NEXT: "Super", +// PROP-NEXT: "Property" +// PROP-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix GET + (id)getWithProperty:(unsigned) Property; +// GET: "!testRelLabel": "memberOf $ c:objc(cs)Super(cm)getWithProperty: $ c:objc(cs)Super" +// GET-LABEL: "!testLabel": "c:objc(cs)Super(cm)getWithProperty:" +// GET: "accessLevel": "public", +// GET: "declarationFragments": [ +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": "+ (" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "keyword", +// GET-NEXT: "spelling": "id" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": ") " +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "identifier", +// GET-NEXT: "spelling": "getWithProperty:" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": "(" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "typeIdentifier", +// GET-NEXT: "preciseIdentifier": "c:i", +// GET-NEXT: "spelling": "unsigned int" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": ") " +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "internalParam", +// GET-NEXT: "spelling": "Property" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": ";" +// GET-NEXT: } +// GET-NEXT: ], +// GET: "functionSignature": { +// GET-NEXT: "parameters": [ +// GET-NEXT: { +// GET-NEXT: "declarationFragments": [ +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": "(" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "typeIdentifier", +// GET-NEXT: "preciseIdentifier": "c:i", +// GET-NEXT: "spelling": "unsigned int" +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "text", +// GET-NEXT: "spelling": ") " +// GET-NEXT: }, +// GET-NEXT: { +// GET-NEXT: "kind": "internalParam", +// GET-NEXT: "spelling": "Property" +// GET-NEXT: } +// GET-NEXT: ], +// GET-NEXT: "name": "Property" +// GET-NEXT: } +// GET-NEXT: ], +// GET-NEXT: "returns": [ +// GET-NEXT: { +// GET-NEXT: "kind": "keyword", +// GET-NEXT: "spelling": "id" +// GET-NEXT: } +// GET-NEXT: ] +// GET-NEXT: }, +// GET: "kind": { +// GET-NEXT: "displayName": "Type Method", +// GET-NEXT: "identifier": "objective-c.type.method" +// GET-NEXT: }, +// GET: "title": "getWithProperty:" +// GET: "pathComponents": [ +// GET-NEXT: "Super", +// GET-NEXT: "getWithProperty:" +// GET-NEXT: ] + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix SET - (void)setProperty:(unsigned) Property andOtherThing: (unsigned) Thing; +// SET: "!testRelLabel": "memberOf $ c:objc(cs)Super(im)setProperty:andOtherThing: $ c:objc(cs)Super" +// SET-LABEL: "!testLabel": "c:objc(cs)Super(im)setProperty:andOtherThing:" +// SET: "accessLevel": "public", +// SET: "declarationFragments": [ +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": "- (" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:v", +// SET-NEXT: "spelling": "void" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ") " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "identifier", +// SET-NEXT: "spelling": "setProperty:" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": "(" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:i", +// SET-NEXT: "spelling": "unsigned int" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ") " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "internalParam", +// SET-NEXT: "spelling": "Property" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": " " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "identifier", +// SET-NEXT: "spelling": "andOtherThing:" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": "(" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:i", +// SET-NEXT: "spelling": "unsigned int" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ") " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "internalParam", +// SET-NEXT: "spelling": "Thing" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ";" +// SET-NEXT: } +// SET-NEXT: ], +// SET: "functionSignature": { +// SET-NEXT: "parameters": [ +// SET-NEXT: { +// SET-NEXT: "declarationFragments": [ +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": "(" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:i", +// SET-NEXT: "spelling": "unsigned int" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ") " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "internalParam", +// SET-NEXT: "spelling": "Property" +// SET-NEXT: } +// SET-NEXT: ], +// SET-NEXT: "name": "Property" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "declarationFragments": [ +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": "(" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:i", +// SET-NEXT: "spelling": "unsigned int" +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "text", +// SET-NEXT: "spelling": ") " +// SET-NEXT: }, +// SET-NEXT: { +// SET-NEXT: "kind": "internalParam", +// SET-NEXT: "spelling": "Thing" +// SET-NEXT: } +// SET-NEXT: ], +// SET-NEXT: "name": "Thing" +// SET-NEXT: } +// SET-NEXT: ], +// SET-NEXT: "returns": [ +// SET-NEXT: { +// SET-NEXT: "kind": "typeIdentifier", +// SET-NEXT: "preciseIdentifier": "c:v", +// SET-NEXT: "spelling": "void" +// SET-NEXT: } +// SET-NEXT: ] +// SET-NEXT: }, +// SET: "kind": { +// SET-NEXT: "displayName": "Instance Method", +// SET-NEXT: "identifier": "objective-c.method" +// SET-NEXT: }, +// SET: "title": "setProperty:andOtherThing:" @end +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix DERIVED @interface Derived : Super { +// DERIVED: "!testRelLabel": "inheritsFrom $ c:objc(cs)Derived $ c:objc(cs)Super" + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix IVAR char Ivar; +// IVAR: "!testRelLabel": "memberOf $ c:objc(cs)Derived@Ivar $ c:objc(cs)Derived" +// IVAR-LABEL: "!testLabel": "c:objc(cs)Derived@Ivar" +// IVAR: "accessLevel": "public", +// IVAR: "declarationFragments": [ +// IVAR-NEXT: { +// IVAR-NEXT: "kind": "typeIdentifier", +// IVAR-NEXT: "preciseIdentifier": "c:C", +// IVAR-NEXT: "spelling": "char" +// IVAR-NEXT: }, +// IVAR-NEXT: { +// IVAR-NEXT: "kind": "text", +// IVAR-NEXT: "spelling": " " +// IVAR-NEXT: }, +// IVAR-NEXT: { +// IVAR-NEXT: "kind": "identifier", +// IVAR-NEXT: "spelling": "Ivar" +// IVAR-NEXT: }, +// IVAR-NEXT: { +// IVAR-NEXT: "kind": "text", +// IVAR-NEXT: "spelling": ";" +// IVAR-NEXT: } +// IVAR-NEXT: ], +// IVAR: "kind": { +// IVAR-NEXT: "displayName": "Instance Variable", +// IVAR-NEXT: "identifier": "objective-c.ivar" +// IVAR-NEXT: }, +// IVAR: "title": "Ivar" +// IVAR: "pathComponents": [ +// IVAR-NEXT: "Derived", +// IVAR-NEXT: "Ivar" +// IVAR-NEXT: ] } -- (char)getIvar; @end -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)Super(cm)getWithProperty:", - "target": "c:objc(cs)Super", - "targetFallback": "Super" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Super(im)setProperty:andOtherThing:", - "target": "c:objc(cs)Super", - "targetFallback": "Super" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Super(py)Property", - "target": "c:objc(cs)Super", - "targetFallback": "Super" - }, - { - "kind": "conformsTo", - "source": "c:objc(cs)Super", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Derived@Ivar", - "target": "c:objc(cs)Derived", - "targetFallback": "Derived" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Derived(im)getIvar", - "target": "c:objc(cs)Derived", - "targetFallback": "Derived" - }, - { - "kind": "inheritsFrom", - "source": "c:objc(cs)Derived", - "target": "c:objc(cs)Super", - "targetFallback": "Super" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Super" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Super" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Super" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Super" - } - ], - "title": "Super" - }, - "pathComponents": [ - "Super" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "+ (" - }, - { - "kind": "keyword", - "spelling": "id" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "getWithProperty:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Property" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Property" - } - ], - "name": "Property" - } - ], - "returns": [ - { - "kind": "keyword", - "spelling": "id" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Super(cm)getWithProperty:" - }, - "kind": { - "displayName": "Type Method", - "identifier": "objective-c.type.method" - }, - "location": { - "position": { - "character": 0, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "getWithProperty:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "+ " - }, - { - "kind": "identifier", - "spelling": "getWithProperty:" - } - ], - "title": "getWithProperty:" - }, - "pathComponents": [ - "Super", - "getWithProperty:" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "setProperty:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Property" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "andOtherThing:" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Thing" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Property" - } - ], - "name": "Property" - }, - { - "declarationFragments": [ - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "internalParam", - "spelling": "Thing" - } - ], - "name": "Thing" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Super(im)setProperty:andOtherThing:" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 5 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "setProperty:andOtherThing:" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "setProperty:andOtherThing:" - } - ], - "title": "setProperty:andOtherThing:" - }, - "pathComponents": [ - "Super", - "setProperty:andOtherThing:" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "readonly" - }, - { - "kind": "text", - "spelling": ", " - }, - { - "kind": "keyword", - "spelling": "getter" - }, - { - "kind": "text", - "spelling": "=" - }, - { - "kind": "identifier", - "spelling": "getProperty" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Property" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Super(py)Property" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 49, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Property" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Property" - } - ], - "title": "Property" - }, - "pathComponents": [ - "Super", - "Property" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Derived" - }, - { - "kind": "text", - "spelling": " : " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:objc(cs)Super", - "spelling": "Super" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Derived" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 8 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Derived" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Derived" - } - ], - "title": "Derived" - }, - "pathComponents": [ - "Derived" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:C", - "spelling": "char" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Ivar" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Derived@Ivar" - }, - "kind": { - "displayName": "Instance Variable", - "identifier": "objective-c.ivar" - }, - "location": { - "position": { - "character": 7, - "line": 9 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Ivar" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Ivar" - } - ], - "title": "Ivar" - }, - "pathComponents": [ - "Derived", - "Ivar" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:C", - "spelling": "char" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "getIvar" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:C", - "spelling": "char" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Derived(im)getIvar" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 11 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "getIvar" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "getIvar" - } - ], - "title": "getIvar" - }, - "pathComponents": [ - "Derived", - "getIvar" - ] - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/objc_module_category.m b/clang/test/ExtractAPI/objc_module_category.m deleted file mode 100644 index 708ed10..0000000 --- a/clang/test/ExtractAPI/objc_module_category.m +++ /dev/null @@ -1,404 +0,0 @@ -// RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header \ -// RUN: -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s - -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h -#import "Foundation.h" - -/// Doc comment 1 -@interface NSString (Category1) --(void)method1; -@end - -/// Doc comment 2 -@interface NSString (Category2) --(void)method2; -@end - -//--- Foundation.h -@interface NSString -@end - -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "extensionTo", - "source": "c:objc(cy)NSString@Category1", - "target": "c:objc(cs)NSString", - "targetFallback": "NSString" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)NSString(im)method1", - "target": "c:objc(cy)NSString@Category1", - "targetFallback": "Category1" - }, - { - "kind": "extensionTo", - "source": "c:objc(cy)NSString@Category2", - "target": "c:objc(cs)NSString", - "targetFallback": "NSString" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)NSString(im)method2", - "target": "c:objc(cy)NSString@Category2", - "targetFallback": "Category2" - } - ], - "symbols": [ - { - "accessLevel": "public", - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category1" - }, - "kind": { - "displayName": "Module Extension", - "identifier": "objective-c.module.extension" - } - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:objc(cs)NSString", - "spelling": "NSString" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "identifier", - "spelling": "Category1" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "docComment": { - "lines": [ - { - "range": { - "end": { - "character": 17, - "line": 2 - }, - "start": { - "character": 4, - "line": 2 - } - }, - "text": "Doc comment 1" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category1" - }, - "kind": { - "displayName": "Class Extension", - "identifier": "objective-c.class.extension" - }, - "location": { - "position": { - "character": 11, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Category1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Category1" - } - ], - "title": "NSString (Category1)" - }, - "pathComponents": [ - "Category1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "method1" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)NSString(im)method1" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "method1" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "method1" - } - ], - "title": "method1" - }, - "pathComponents": [ - "Category1", - "method1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:objc(cs)NSString", - "spelling": "NSString" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "identifier", - "spelling": "Category2" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "docComment": { - "lines": [ - { - "range": { - "end": { - "character": 17, - "line": 7 - }, - "start": { - "character": 4, - "line": 7 - } - }, - "text": "Doc comment 2" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category2" - }, - "kind": { - "displayName": "Class Extension", - "identifier": "objective-c.class.extension" - }, - "location": { - "position": { - "character": 11, - "line": 8 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Category2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Category2" - } - ], - "title": "NSString (Category2)" - }, - "pathComponents": [ - "Category2" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "method2" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)NSString(im)method2" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 9 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "method2" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "method2" - } - ], - "title": "method2" - }, - "pathComponents": [ - "Category2", - "method2" - ] - } - ] -} diff --git a/clang/test/ExtractAPI/objc_property.m b/clang/test/ExtractAPI/objc_property.m index 5712abc..f05584c 100644 --- a/clang/test/ExtractAPI/objc_property.m +++ b/clang/test/ExtractAPI/objc_property.m @@ -1,608 +1,26 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx -x objective-c-header %t/input.h -o %t/output.json -verify +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %s -o - -verify | FileCheck %s -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -//--- input.h @protocol Protocol @property(class) int myProtocolTypeProp; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(pl)Protocol(cpy)myProtocolTypeProp $ c:objc(pl)Protocol" @property int myProtocolInstanceProp; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(pl)Protocol(py)myProtocolInstanceProp $ c:objc(pl)Protocol" @end @interface Interface @property(class) int myInterfaceTypeProp; +// CHECk-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(cpy)myInterfaceTypeProp $ c:objc(cs)Interface" @property int myInterfaceInstanceProp; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(py)myInterfaceInstanceProp $ c:objc(cs)Interface" @end @interface Interface (Category) @property(class) int myCategoryTypeProp; +// CHECK-DAG: "!testRelLabel": "memberOf $ c:objc(cs)Interface(cpy)myCategoryTypeProp $ c:objc(cs)Interface" @property int myCategoryInstanceProp; +// CHECK-DAG "!testRelLabel": "memberOf $ c:objc(cs)Interface(py)myCategoryInstanceProp $ c:objc(cs)Interface" @end -// expected-no-diagnostics -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(cpy)myInterfaceTypeProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(py)myInterfaceInstanceProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(cpy)myCategoryTypeProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)Interface(py)myCategoryInstanceProp", - "target": "c:objc(cs)Interface", - "targetFallback": "Interface" - }, - { - "kind": "conformsTo", - "source": "c:objc(cs)Interface", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" - }, - { - "kind": "memberOf", - "source": "c:objc(pl)Protocol(cpy)myProtocolTypeProp", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" - }, - { - "kind": "memberOf", - "source": "c:objc(pl)Protocol(py)myProtocolInstanceProp", - "target": "c:objc(pl)Protocol", - "targetFallback": "Protocol" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 5 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Interface" - } - ], - "title": "Interface" - }, - "pathComponents": [ - "Interface" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "class" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myInterfaceTypeProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(cpy)myInterfaceTypeProp" - }, - "kind": { - "displayName": "Type Property", - "identifier": "objective-c.type.property" - }, - "location": { - "position": { - "character": 21, - "line": 6 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myInterfaceTypeProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myInterfaceTypeProp" - } - ], - "title": "myInterfaceTypeProp" - }, - "pathComponents": [ - "Interface", - "myInterfaceTypeProp" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myInterfaceInstanceProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(py)myInterfaceInstanceProp" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 14, - "line": 7 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myInterfaceInstanceProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myInterfaceInstanceProp" - } - ], - "title": "myInterfaceInstanceProp" - }, - "pathComponents": [ - "Interface", - "myInterfaceInstanceProp" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "class" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myCategoryTypeProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(cpy)myCategoryTypeProp" - }, - "kind": { - "displayName": "Type Property", - "identifier": "objective-c.type.property" - }, - "location": { - "position": { - "character": 21, - "line": 11 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myCategoryTypeProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myCategoryTypeProp" - } - ], - "title": "myCategoryTypeProp" - }, - "pathComponents": [ - "Interface", - "myCategoryTypeProp" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myCategoryInstanceProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)Interface(py)myCategoryInstanceProp" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 14, - "line": 12 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myCategoryInstanceProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myCategoryInstanceProp" - } - ], - "title": "myCategoryInstanceProp" - }, - "pathComponents": [ - "Interface", - "myCategoryInstanceProp" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@protocol" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Protocol" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(pl)Protocol" - }, - "kind": { - "displayName": "Protocol", - "identifier": "objective-c.protocol" - }, - "location": { - "position": { - "character": 10, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Protocol" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Protocol" - } - ], - "title": "Protocol" - }, - "pathComponents": [ - "Protocol" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "keyword", - "spelling": "class" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myProtocolTypeProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(pl)Protocol(cpy)myProtocolTypeProp" - }, - "kind": { - "displayName": "Type Property", - "identifier": "objective-c.type.property" - }, - "location": { - "position": { - "character": 21, - "line": 1 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myProtocolTypeProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myProtocolTypeProp" - } - ], - "title": "myProtocolTypeProp" - }, - "pathComponents": [ - "Protocol", - "myProtocolTypeProp" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@property" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "myProtocolInstanceProp" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(pl)Protocol(py)myProtocolInstanceProp" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "objective-c.property" - }, - "location": { - "position": { - "character": 14, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "myProtocolInstanceProp" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "myProtocolInstanceProp" - } - ], - "title": "myProtocolInstanceProp" - }, - "pathComponents": [ - "Protocol", - "myProtocolInstanceProp" - ] - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/objc_protocol.m b/clang/test/ExtractAPI/objc_protocol.m index a04936f..06f7ee3 100644 --- a/clang/test/ExtractAPI/objc_protocol.m +++ b/clang/test/ExtractAPI/objc_protocol.m @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf -x objective-c-header -target arm64-apple-macosx \ // RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/objc_various_categories.m b/clang/test/ExtractAPI/objc_various_categories.m deleted file mode 100644 index adaef5a..0000000 --- a/clang/test/ExtractAPI/objc_various_categories.m +++ /dev/null @@ -1,507 +0,0 @@ -// RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -x objective-c-header \ -// RUN: -target arm64-apple-macosx \ -// RUN: %t/myclass_1.h \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s - -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h -#import "myclass_1.h" -#import "Foundation.h" - -@interface MyClass1 (MyCategory1) -- (int) SomeMethod; -@end - -@interface NSString (Category1) --(void) StringMethod; -@end - -@interface NSString (Category2) --(void) StringMethod2; -@end - -//--- myclass_1.h -@interface MyClass1 -@end - -//--- Foundation.h -@interface NSString -@end - -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:objc(cs)MyClass1(im)SomeMethod", - "target": "c:objc(cs)MyClass1", - "targetFallback": "MyClass1" - }, - { - "kind": "extensionTo", - "source": "c:objc(cy)NSString@Category1", - "target": "c:objc(cs)NSString", - "targetFallback": "NSString" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)NSString(im)StringMethod", - "target": "c:objc(cy)NSString@Category1", - "targetFallback": "Category1" - }, - { - "kind": "extensionTo", - "source": "c:objc(cy)NSString@Category2", - "target": "c:objc(cs)NSString", - "targetFallback": "NSString" - }, - { - "kind": "memberOf", - "source": "c:objc(cs)NSString(im)StringMethod2", - "target": "c:objc(cy)NSString@Category2", - "targetFallback": "Category2" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyClass1" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)MyClass1" - }, - "kind": { - "displayName": "Class", - "identifier": "objective-c.class" - }, - "location": { - "position": { - "character": 11, - "line": 0 - }, - "uri": "file://INPUT_DIR/myclass_1.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyClass1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyClass1" - } - ], - "title": "MyClass1" - }, - "pathComponents": [ - "MyClass1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "SomeMethod" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)MyClass1(im)SomeMethod" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "SomeMethod" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "SomeMethod" - } - ], - "title": "SomeMethod" - }, - "pathComponents": [ - "MyClass1", - "SomeMethod" - ] - }, - { - "accessLevel": "public", - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category1" - }, - "kind": { - "displayName": "Module Extension", - "identifier": "objective-c.module.extension" - } - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:objc(cs)NSString", - "spelling": "NSString" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "identifier", - "spelling": "Category1" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category1" - }, - "kind": { - "displayName": "Class Extension", - "identifier": "objective-c.class.extension" - }, - "location": { - "position": { - "character": 11, - "line": 7 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Category1" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Category1" - } - ], - "title": "NSString (Category1)" - }, - "pathComponents": [ - "Category1" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "StringMethod" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)NSString(im)StringMethod" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 8 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "StringMethod" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "StringMethod" - } - ], - "title": "StringMethod" - }, - "pathComponents": [ - "Category1", - "StringMethod" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "@interface" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:objc(cs)NSString", - "spelling": "NSString" - }, - { - "kind": "text", - "spelling": " (" - }, - { - "kind": "identifier", - "spelling": "Category2" - }, - { - "kind": "text", - "spelling": ")" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cy)NSString@Category2" - }, - "kind": { - "displayName": "Class Extension", - "identifier": "objective-c.class.extension" - }, - "location": { - "position": { - "character": 11, - "line": 11 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Category2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Category2" - } - ], - "title": "NSString (Category2)" - }, - "pathComponents": [ - "Category2" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "text", - "spelling": "- (" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": ") " - }, - { - "kind": "identifier", - "spelling": "StringMethod2" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "functionSignature": { - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:objc(cs)NSString(im)StringMethod2" - }, - "kind": { - "displayName": "Instance Method", - "identifier": "objective-c.method" - }, - "location": { - "position": { - "character": 0, - "line": 12 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "StringMethod2" - } - ], - "subHeading": [ - { - "kind": "text", - "spelling": "- " - }, - { - "kind": "identifier", - "spelling": "StringMethod2" - } - ], - "title": "StringMethod2" - }, - "pathComponents": [ - "Category2", - "StringMethod2" - ] - } - ] -} diff --git a/clang/test/ExtractAPI/operator_overload.cpp b/clang/test/ExtractAPI/operator_overload.cpp index 511a5a7..9430c58 100644 --- a/clang/test/ExtractAPI/operator_overload.cpp +++ b/clang/test/ExtractAPI/operator_overload.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/relative_include.m b/clang/test/ExtractAPI/relative_include.m index 46cbdaee..e5a0268 100644 --- a/clang/test/ExtractAPI/relative_include.m +++ b/clang/test/ExtractAPI/relative_include.m @@ -15,7 +15,7 @@ // RUN: %hmaptool write %t/headermap.hmap.json %t/headermap.hmap // Input headers use paths to the framework root/DSTROOT -// RUN: %clang_cc1 -extract-api -v --product-name=MyFramework \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -v --product-name=MyFramework \ // RUN: -triple arm64-apple-macosx \ // RUN: -iquote%t -I%t/headermap.hmap -F%t/Frameworks \ // RUN: -x objective-c-header \ diff --git a/clang/test/ExtractAPI/simple_inheritance.cpp b/clang/test/ExtractAPI/simple_inheritance.cpp index 5fe99af..58c3c4e 100644 --- a/clang/test/ExtractAPI/simple_inheritance.cpp +++ b/clang/test/ExtractAPI/simple_inheritance.cpp @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx \ // RUN: -x c++-header %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/struct.c b/clang/test/ExtractAPI/struct.c index 4284b73..1995a6ae 100644 --- a/clang/test/ExtractAPI/struct.c +++ b/clang/test/ExtractAPI/struct.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf -target arm64-apple-macosx \ // RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/typedef.c b/clang/test/ExtractAPI/typedef.c index c30e655..a4c3619 100644 --- a/clang/test/ExtractAPI/typedef.c +++ b/clang/test/ExtractAPI/typedef.c @@ -1,391 +1,93 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=Typedef -target arm64-apple-macosx \ -// RUN: -x objective-c-header %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -triple arm64-apple-macosx -x objective-c-header %s -o %t/output.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix MYINT typedef int MyInt; +// MYINT-LABEL: "!testLabel": "c:typedef.c@T@MyInt" +// MYINT: "accessLevel": "public", +// MYINT: "declarationFragments": [ +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "keyword", +// MYINT-NEXT: "spelling": "typedef" +// MYINT-NEXT: }, +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "text", +// MYINT-NEXT: "spelling": " " +// MYINT-NEXT: }, +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "typeIdentifier", +// MYINT-NEXT: "preciseIdentifier": "c:I", +// MYINT-NEXT: "spelling": "int" +// MYINT-NEXT: }, +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "text", +// MYINT-NEXT: "spelling": " " +// MYINT-NEXT: }, +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "identifier", +// MYINT-NEXT: "spelling": "MyInt" +// MYINT-NEXT: }, +// MYINT-NEXT: { +// MYINT-NEXT: "kind": "text", +// MYINT-NEXT: "spelling": ";" +// MYINT-NEXT: } +// MYINT-NEXT: ], +// MYINT: "kind": { +// MYINT-NEXT: "displayName": "Type Alias", +// MYINT-NEXT: "identifier": "objective-c.typealias" +// MYINT-NEXT: }, +// MYINT: "title": "MyInt" +// MYINT: "pathComponents": [ +// MYINT-NEXT: "MyInt" +// MYINT-NEXT: ], +// MYINT: "type": "c:I" +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix BARPTR typedef struct Bar *BarPtr; +// BARPTR-LABEL: "!testLabel": "c:typedef.c@T@BarPtr" +// BARPTR: "accessLevel": "public", +// BARPTR: "declarationFragments": [ +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "keyword", +// BARPTR-NEXT: "spelling": "typedef" +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "text", +// BARPTR-NEXT: "spelling": " " +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "keyword", +// BARPTR-NEXT: "spelling": "struct" +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "text", +// BARPTR-NEXT: "spelling": " " +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "typeIdentifier", +// BARPTR-NEXT: "preciseIdentifier": "c:@S@Bar", +// BARPTR-NEXT: "spelling": "Bar" +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "text", +// BARPTR-NEXT: "spelling": " * " +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "identifier", +// BARPTR-NEXT: "spelling": "BarPtr" +// BARPTR-NEXT: }, +// BARPTR-NEXT: { +// BARPTR-NEXT: "kind": "text", +// BARPTR-NEXT: "spelling": ";" +// BARPTR-NEXT: } +// BARPTR-NEXT: ], +// BARPTR: "type": "c:*$@S@Bar" +// RUN: FileCheck %s --input-file %t/output.symbols.json void foo(BarPtr value); void baz(BarPtr *value); +// CHECK-NOT: struct Bar * -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "Typedef", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "foo" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@BarPtr", - "spelling": "BarPtr" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "value" - }, - { - "kind": "text", - "spelling": ");" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@BarPtr", - "spelling": "BarPtr" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "internalParam", - "spelling": "value" - } - ], - "name": "value" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:@F@foo" - }, - "kind": { - "displayName": "Function", - "identifier": "objective-c.func" - }, - "location": { - "position": { - "character": 5, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "foo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "foo" - } - ], - "title": "foo" - }, - "pathComponents": [ - "foo" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "baz" - }, - { - "kind": "text", - "spelling": "(" - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@BarPtr", - "spelling": "BarPtr" - }, - { - "kind": "text", - "spelling": " * " - }, - { - "kind": "internalParam", - "spelling": "value" - }, - { - "kind": "text", - "spelling": ");" - } - ], - "functionSignature": { - "parameters": [ - { - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@BarPtr", - "spelling": "BarPtr" - }, - { - "kind": "text", - "spelling": " * " - }, - { - "kind": "internalParam", - "spelling": "value" - } - ], - "name": "value" - } - ], - "returns": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:v", - "spelling": "void" - } - ] - }, - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:@F@baz" - }, - "kind": { - "displayName": "Function", - "identifier": "objective-c.func" - }, - "location": { - "position": { - "character": 5, - "line": 6 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "baz" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "baz" - } - ], - "title": "baz" - }, - "pathComponents": [ - "baz" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyInt" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@T@MyInt" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "objective-c.typealias" - }, - "location": { - "position": { - "character": 12, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyInt" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyInt" - } - ], - "title": "MyInt" - }, - "pathComponents": [ - "MyInt" - ], - "type": "c:I" - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:@S@Bar", - "spelling": "Bar" - }, - { - "kind": "text", - "spelling": " * " - }, - { - "kind": "identifier", - "spelling": "BarPtr" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "objective-c", - "precise": "c:input.h@T@BarPtr" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "objective-c.typealias" - }, - "location": { - "position": { - "character": 20, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "BarPtr" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "BarPtr" - } - ], - "title": "BarPtr" - }, - "pathComponents": [ - "BarPtr" - ], - "type": "c:*$@S@Bar" - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/typedef_anonymous_record.c b/clang/test/ExtractAPI/typedef_anonymous_record.c index 3e4c3e1..9e00ff7 100644 --- a/clang/test/ExtractAPI/typedef_anonymous_record.c +++ b/clang/test/ExtractAPI/typedef_anonymous_record.c @@ -1,468 +1,158 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api --product-name=TypedefChain -triple arm64-apple-macosx \ -// RUN: -x c-header %t/input.h -o %t/output.json -verify +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: --product-name=TypedefChain -triple arm64-apple-macosx -x c-header %s -o %t/typedefchain.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -//--- input.h +// RUN: FileCheck %s --input-file %t/typedefchain.symbols.json --check-prefix MYSTRUCT typedef struct { } MyStruct; +// MYSTRUCT-LABEL: "!testLabel": "c:@SA@MyStruct" +// MYSTRUCT: "accessLevel": "public", +// MYSTRUCT: "declarationFragments": [ +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "keyword", +// MYSTRUCT-NEXT: "spelling": "typedef" +// MYSTRUCT-NEXT: }, +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "text", +// MYSTRUCT-NEXT: "spelling": " " +// MYSTRUCT-NEXT: }, +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "keyword", +// MYSTRUCT-NEXT: "spelling": "struct" +// MYSTRUCT-NEXT: }, +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "text", +// MYSTRUCT-NEXT: "spelling": " " +// MYSTRUCT-NEXT: }, +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "identifier", +// MYSTRUCT-NEXT: "spelling": "MyStruct" +// MYSTRUCT-NEXT: }, +// MYSTRUCT-NEXT: { +// MYSTRUCT-NEXT: "kind": "text", +// MYSTRUCT-NEXT: "spelling": ";" +// MYSTRUCT-NEXT: } +// MYSTRUCT-NEXT: ] +// MYSTRUCT: "kind": { +// MYSTRUCT-NEXT: "displayName": "Structure", +// MYSTRUCT-NEXT: "identifier": "c.struct" +// MYSTRUCT: "title": "MyStruct" +// MYSTRUCT: "pathComponents": [ +// MYSTRUCT-NEXT: "MyStruct" +// MYSTRUCT-NEXT: ] + +// RUN: FileCheck %s --input-file %t/typedefchain.symbols.json --check-prefix MYSTRUCTSTRUCT typedef MyStruct MyStructStruct; -typedef MyStructStruct MyStructStructStruct; +// MYSTRUCTSTRUCT-LABEL: "!testLabel": "c:typedef_anonymous_record.c@T@MyStructStruct" +// MYSTRUCTSTRUCT: "accessLevel": "public", +// MYSTRUCTSTRUCT: "declarationFragments": [ +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "keyword", +// MYSTRUCTSTRUCT-NEXT: "spelling": "typedef" +// MYSTRUCTSTRUCT-NEXT: }, +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "text", +// MYSTRUCTSTRUCT-NEXT: "spelling": " " +// MYSTRUCTSTRUCT-NEXT: }, +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "typeIdentifier", +// MYSTRUCTSTRUCT-NEXT: "preciseIdentifier": "c:@SA@MyStruct", +// MYSTRUCTSTRUCT-NEXT: "spelling": "MyStruct" +// MYSTRUCTSTRUCT-NEXT: }, +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "text", +// MYSTRUCTSTRUCT-NEXT: "spelling": " " +// MYSTRUCTSTRUCT-NEXT: }, +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "identifier", +// MYSTRUCTSTRUCT-NEXT: "spelling": "MyStructStruct" +// MYSTRUCTSTRUCT-NEXT: }, +// MYSTRUCTSTRUCT-NEXT: { +// MYSTRUCTSTRUCT-NEXT: "kind": "text", +// MYSTRUCTSTRUCT-NEXT: "spelling": ";" +// MYSTRUCTSTRUCT-NEXT: } +// MYSTRUCTSTRUCT-NEXT:], +// MYSTRUCTSTRUCT: "kind": { +// MYSTRUCTSTRUCT-NEXT: "displayName": "Type Alias", +// MYSTRUCTSTRUCT-NEXT: "identifier": "c.typealias" + +// RUN: FileCheck %s --input-file %t/typedefchain.symbols.json --check-prefix MYENUM +// RUN: FileCheck %s --input-file %t/typedefchain.symbols.json --check-prefix CASE typedef enum { Case } MyEnum; +// MYENUM: "source": "c:@EA@MyEnum@Case", +// MYENUM-NEXT: "target": "c:@EA@MyEnum", +// MYENUM-NEXT: "targetFallback": "MyEnum" +// MYENUM-LABEL: "!testLabel": "c:@EA@MyEnum" +// MYENUM: "declarationFragments": [ +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "keyword", +// MYENUM-NEXT: "spelling": "typedef" +// MYENUM-NEXT: }, +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "text", +// MYENUM-NEXT: "spelling": " " +// MYENUM-NEXT: }, +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "keyword", +// MYENUM-NEXT: "spelling": "enum" +// MYENUM-NEXT: }, +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "text", +// MYENUM-NEXT: "spelling": " " +// MYENUM-NEXT: }, +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "identifier", +// MYENUM-NEXT: "spelling": "MyEnum" +// MYENUM-NEXT: }, +// MYENUM-NEXT: { +// MYENUM-NEXT: "kind": "text", +// MYENUM-NEXT: "spelling": ";" +// MYENUM-NEXT: } +// MYENUM-NEXT:], +// MYENUM: "kind": { +// MYENUM-NEXT: "displayName": "Enumeration", +// MYENUM-NEXT: "identifier": "c.enum" +// MYENUM: "title": "MyEnum" + +// CASE-LABEL: "!testLabel": "c:@EA@MyEnum@Case" +// CASE: "pathComponents": [ +// CASE-NEXT: "MyEnum", +// CASE-NEXT: "Case" +// CASE-NEXT: ] + +// RUN: FileCheck %s --input-file %t/typedefchain.symbols.json --check-prefix MYENUMENUM typedef MyEnum MyEnumEnum; -typedef MyEnumEnum MyEnumEnumEnum; -// expected-no-diagnostics +// MYENUMENUM-LABEL: "!testLabel": "c:typedef_anonymous_record.c@T@MyEnumEnum" +// MYENUMENUM: "declarationFragments": [ +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "keyword", +// MYENUMENUM-NEXT: "spelling": "typedef" +// MYENUMENUM-NEXT: }, +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "text", +// MYENUMENUM-NEXT: "spelling": " " +// MYENUMENUM-NEXT: }, +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "typeIdentifier", +// MYENUMENUM-NEXT: "preciseIdentifier": "c:@EA@MyEnum", +// MYENUMENUM-NEXT: "spelling": "MyEnum" +// MYENUMENUM-NEXT: }, +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "text", +// MYENUMENUM-NEXT: "spelling": " " +// MYENUMENUM-NEXT: }, +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "identifier", +// MYENUMENUM-NEXT: "spelling": "MyEnumEnum" +// MYENUMENUM-NEXT: }, +// MYENUMENUM-NEXT: { +// MYENUMENUM-NEXT: "kind": "text", +// MYENUMENUM-NEXT: "spelling": ";" +// MYENUMENUM-NEXT: } +// MYENUMENUM-NEXT: ], +// MYENUMENUM: "kind": { +// MYENUMENUM-NEXT: "displayName": "Type Alias", +// MYENUMENUM-NEXT: "identifier": "c.typealias" +// MYENUMENUM-NEXT: }, +// MYENUMENUM: "title": "MyEnumEnum" -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "TypedefChain", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:@EA@MyEnum@Case", - "target": "c:@EA@MyEnum", - "targetFallback": "MyEnum" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "enum" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyEnum" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@EA@MyEnum" - }, - "kind": { - "displayName": "Enumeration", - "identifier": "c.enum" - }, - "location": { - "position": { - "character": 8, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyEnum" - } - ], - "title": "MyEnum" - }, - "pathComponents": [ - "MyEnum" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "identifier", - "spelling": "Case" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@EA@MyEnum@Case" - }, - "kind": { - "displayName": "Enumeration Case", - "identifier": "c.enum.case" - }, - "location": { - "position": { - "character": 15, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Case" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Case" - } - ], - "title": "Case" - }, - "pathComponents": [ - "MyEnum", - "Case" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyStruct" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@SA@MyStruct" - }, - "kind": { - "displayName": "Structure", - "identifier": "c.struct" - }, - "location": { - "position": { - "character": 8, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyStruct" - } - ], - "title": "MyStruct" - }, - "pathComponents": [ - "MyStruct" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:@SA@MyStruct", - "spelling": "MyStruct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyStructStruct" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@MyStructStruct" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 17, - "line": 1 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyStructStruct" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyStructStruct" - } - ], - "title": "MyStructStruct" - }, - "pathComponents": [ - "MyStructStruct" - ], - "type": "c:@SA@MyStruct" - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@MyStructStruct", - "spelling": "MyStructStruct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyStructStructStruct" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@MyStructStructStruct" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 23, - "line": 2 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyStructStructStruct" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyStructStructStruct" - } - ], - "title": "MyStructStructStruct" - }, - "pathComponents": [ - "MyStructStructStruct" - ], - "type": "c:input.h@T@MyStructStruct" - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:@EA@MyEnum", - "spelling": "MyEnum" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyEnumEnum" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@MyEnumEnum" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 15, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyEnumEnum" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyEnumEnum" - } - ], - "title": "MyEnumEnum" - }, - "pathComponents": [ - "MyEnumEnum" - ], - "type": "c:@EA@MyEnum" - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:input.h@T@MyEnumEnum", - "spelling": "MyEnumEnum" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "MyEnumEnumEnum" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@MyEnumEnumEnum" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 19, - "line": 5 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "MyEnumEnumEnum" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "MyEnumEnumEnum" - } - ], - "title": "MyEnumEnumEnum" - }, - "pathComponents": [ - "MyEnumEnumEnum" - ], - "type": "c:input.h@T@MyEnumEnum" - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/typedef_chain.c b/clang/test/ExtractAPI/typedef_chain.c index 9e6151c..05d4eb5 100644 --- a/clang/test/ExtractAPI/typedef_chain.c +++ b/clang/test/ExtractAPI/typedef_chain.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api --product-name=TypedefChain -target arm64-apple-macosx \ +// RUN: %clang -extract-api --pretty-sgf --product-name=TypedefChain -target arm64-apple-macosx \ // RUN: -x objective-c-header %t/input.h -o %t/output.json | FileCheck -allow-empty %s // Generator version is not consistent across test runs, normalize it. diff --git a/clang/test/ExtractAPI/typedef_struct_enum.c b/clang/test/ExtractAPI/typedef_struct_enum.c index 15357d5..fb6fbe9 100644 --- a/clang/test/ExtractAPI/typedef_struct_enum.c +++ b/clang/test/ExtractAPI/typedef_struct_enum.c @@ -1,445 +1,146 @@ // RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang -extract-api -target arm64-apple-macosx \ -// RUN: %t/input.h -o %t/output.json | FileCheck -allow-empty %s +// RUN: %clang_cc1 -extract-api --pretty-sgf --emit-sgf-symbol-labels-for-testing \ +// RUN: -x c-header %s -triple arm64-apple-macos -o %t/output.symbols.json -verify -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -// CHECK-NOT: error: -// CHECK-NOT: warning: - -//--- input.h +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix TEST typedef struct Test { } Test; +// TEST-LABEL: "!testLabel": "c:@S@Test" +// TEST: "declarationFragments": [ +// TEST-NEXT: { +// TEST-NEXT: "kind": "keyword", +// TEST-NEXT: "spelling": "typedef" +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "text", +// TEST-NEXT: "spelling": " " +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "keyword", +// TEST-NEXT: "spelling": "struct" +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "text", +// TEST-NEXT: "spelling": " " +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "identifier", +// TEST-NEXT: "spelling": "Test" +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "text", +// TEST-NEXT: "spelling": " { ... } " +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "identifier", +// TEST-NEXT: "spelling": "Test" +// TEST-NEXT: }, +// TEST-NEXT: { +// TEST-NEXT: "kind": "text", +// TEST-NEXT: "spelling": ";" +// TEST-NEXT: } +// TEST-NEXT: ], +// TEST: "displayName": "Structure", +// TEST: "title": "Test" +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix TEST2 typedef enum Test2 { simple } Test2; +// TEST2-LABEL: "!testLabel": "c:@E@Test2" +// TEST2: "declarationFragments": [ +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "keyword", +// TEST2-NEXT: "spelling": "typedef" +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "text", +// TEST2-NEXT: "spelling": " " +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "keyword", +// TEST2-NEXT: "spelling": "enum" +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "text", +// TEST2-NEXT: "spelling": " " +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "identifier", +// TEST2-NEXT: "spelling": "Test2" +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "text", +// TEST2-NEXT: "spelling": ": " +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "typeIdentifier", +// TEST2-NEXT: "preciseIdentifier": "c:i", +// TEST2-NEXT: "spelling": "unsigned int" +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "text", +// TEST2-NEXT: "spelling": " { ... } " +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "identifier", +// TEST2-NEXT: "spelling": "Test2" +// TEST2-NEXT: }, +// TEST2-NEXT: { +// TEST2-NEXT: "kind": "text", +// TEST2-NEXT: "spelling": ";" +// TEST2-NEXT: } +// TEST2-NEXT: ], +// TEST2: "displayName": "Enumeration", +// TEST2: "title": "Test2" + struct Foo; + +// RUN: FileCheck %s --input-file %t/output.symbols.json --check-prefix TYPEDEF typedef struct Foo TypedefedFoo; +// TYPEDEF-LABEL: "!testLabel": "c:typedef_struct_enum.c@T@TypedefedFoo" +// TYPEDEF: "declarationFragments": [ +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "keyword", +// TYPEDEF-NEXT: "spelling": "typedef" +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "text", +// TYPEDEF-NEXT: "spelling": " " +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "keyword", +// TYPEDEF-NEXT: "spelling": "struct" +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "text", +// TYPEDEF-NEXT: "spelling": " " +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "typeIdentifier", +// TYPEDEF-NEXT: "preciseIdentifier": "c:@S@Foo", +// TYPEDEF-NEXT: "spelling": "Foo" +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "text", +// TYPEDEF-NEXT: "spelling": " " +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "identifier", +// TYPEDEF-NEXT: "spelling": "TypedefedFoo" +// TYPEDEF-NEXT: }, +// TYPEDEF-NEXT: { +// TYPEDEF-NEXT: "kind": "text", +// TYPEDEF-NEXT: "spelling": ";" +// TYPEDEF-NEXT: } +// TYPEDEF-NEXT: ], +// TYPEDEF: "displayName": "Type Alias", +// TYPEDEF: "title": "TypedefedFoo" +// TYPEDEF: "type": "c:@S@Foo" + struct Foo { int bar; }; -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:@E@Test2@simple", - "target": "c:@E@Test2", - "targetFallback": "Test2" - }, - { - "kind": "memberOf", - "source": "c:@S@Foo@FI@bar", - "target": "c:@S@Foo", - "targetFallback": "Foo" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "enum" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Test2" - }, - { - "kind": "text", - "spelling": ": " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:i", - "spelling": "unsigned int" - }, - { - "kind": "text", - "spelling": " { ... } " - }, - { - "kind": "identifier", - "spelling": "Test2" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@E@Test2" - }, - "kind": { - "displayName": "Enumeration", - "identifier": "c.enum" - }, - "location": { - "position": { - "character": 13, - "line": 3 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Test2" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Test2" - } - ], - "title": "Test2" - }, - "pathComponents": [ - "Test2" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "identifier", - "spelling": "simple" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@E@Test2@simple" - }, - "kind": { - "displayName": "Enumeration Case", - "identifier": "c.enum.case" - }, - "location": { - "position": { - "character": 2, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "simple" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "simple" - } - ], - "title": "simple" - }, - "pathComponents": [ - "Test2", - "simple" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Test" - }, - { - "kind": "text", - "spelling": " { ... } " - }, - { - "kind": "identifier", - "spelling": "Test" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@S@Test" - }, - "kind": { - "displayName": "Structure", - "identifier": "c.struct" - }, - "location": { - "position": { - "character": 15, - "line": 0 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Test" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Test" - } - ], - "title": "Test" - }, - "pathComponents": [ - "Test" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "Foo" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@S@Foo" - }, - "kind": { - "displayName": "Structure", - "identifier": "c.struct" - }, - "location": { - "position": { - "character": 7, - "line": 9 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "Foo" - } - ], - "title": "Foo" - }, - "pathComponents": [ - "Foo" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "bar" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@S@Foo@FI@bar" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "c.property" - }, - "location": { - "position": { - "character": 8, - "line": 10 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "bar" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "bar" - } - ], - "title": "bar" - }, - "pathComponents": [ - "Foo", - "bar" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:@S@Foo", - "spelling": "Foo" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "TypedefedFoo" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@TypedefedFoo" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 19, - "line": 8 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "TypedefedFoo" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "TypedefedFoo" - } - ], - "title": "TypedefedFoo" - }, - "pathComponents": [ - "TypedefedFoo" - ], - "type": "c:@S@Foo" - } - ] -} +// expected-no-diagnostics diff --git a/clang/test/ExtractAPI/underscored.c b/clang/test/ExtractAPI/underscored.c index 30d2b63..204ec36 100644 --- a/clang/test/ExtractAPI/underscored.c +++ b/clang/test/ExtractAPI/underscored.c @@ -1,17 +1,5 @@ -// RUN: rm -rf %t -// RUN: split-file %s %t -// RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ -// RUN: %t/reference.output.json.in >> %t/reference.output.json // RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx \ -// RUN: -x c-header %t/input.h -o %t/output.json -verify - -// Generator version is not consistent across test runs, normalize it. -// RUN: sed -e "s@\"generator\": \".*\"@\"generator\": \"?\"@g" \ -// RUN: %t/output.json >> %t/output-normalized.json -// RUN: diff %t/reference.output.json %t/output-normalized.json - -//--- input.h -// expected-no-diagnostics +// RUN: -x c-header %s -o - -verify | FileCheck %s // Global record int _HiddenGlobal; @@ -19,399 +7,22 @@ int exposed_global; // Record type struct _HiddenRecord { - int a; + int HiddenRecordMember; }; struct ExposedRecord { - int a; + int ExposedRecordMember; }; -// Typedef -typedef struct {} _HiddenTypedef; -typedef int ExposedTypedef; -typedef _HiddenTypedef ExposedTypedefToHidden; - // Macros #define _HIDDEN_MACRO 5 #define EXPOSED_MACRO 5 -// Symbols that start with '_' should not appear in the reference output -//--- reference.output.json.in -{ - "metadata": { - "formatVersion": { - "major": 0, - "minor": 5, - "patch": 3 - }, - "generator": "?" - }, - "module": { - "name": "", - "platform": { - "architecture": "arm64", - "operatingSystem": { - "minimumVersion": { - "major": 11, - "minor": 0, - "patch": 0 - }, - "name": "macosx" - }, - "vendor": "apple" - } - }, - "relationships": [ - { - "kind": "memberOf", - "source": "c:@S@ExposedRecord@FI@a", - "target": "c:@S@ExposedRecord", - "targetFallback": "ExposedRecord" - } - ], - "symbols": [ - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "exposed_global" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@exposed_global" - }, - "kind": { - "displayName": "Global Variable", - "identifier": "c.var" - }, - "location": { - "position": { - "character": 4, - "line": 4 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "exposed_global" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "exposed_global" - } - ], - "title": "exposed_global" - }, - "pathComponents": [ - "exposed_global" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "struct" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "ExposedRecord" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@S@ExposedRecord" - }, - "kind": { - "displayName": "Structure", - "identifier": "c.struct" - }, - "location": { - "position": { - "character": 7, - "line": 11 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "ExposedRecord" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "ExposedRecord" - } - ], - "title": "ExposedRecord" - }, - "pathComponents": [ - "ExposedRecord" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "a" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:@S@ExposedRecord@FI@a" - }, - "kind": { - "displayName": "Instance Property", - "identifier": "c.property" - }, - "location": { - "position": { - "character": 6, - "line": 12 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "a" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "a" - } - ], - "title": "a" - }, - "pathComponents": [ - "ExposedRecord", - "a" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "#define" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "EXPOSED_MACRO" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@335@macro@EXPOSED_MACRO" - }, - "kind": { - "displayName": "Macro", - "identifier": "c.macro" - }, - "location": { - "position": { - "character": 8, - "line": 22 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "EXPOSED_MACRO" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "EXPOSED_MACRO" - } - ], - "title": "EXPOSED_MACRO" - }, - "pathComponents": [ - "EXPOSED_MACRO" - ] - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:I", - "spelling": "int" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "ExposedTypedef" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@ExposedTypedef" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 12, - "line": 17 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "ExposedTypedef" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "ExposedTypedef" - } - ], - "title": "ExposedTypedef" - }, - "pathComponents": [ - "ExposedTypedef" - ], - "type": "c:I" - }, - { - "accessLevel": "public", - "declarationFragments": [ - { - "kind": "keyword", - "spelling": "typedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "typeIdentifier", - "preciseIdentifier": "c:@SA@_HiddenTypedef", - "spelling": "_HiddenTypedef" - }, - { - "kind": "text", - "spelling": " " - }, - { - "kind": "identifier", - "spelling": "ExposedTypedefToHidden" - }, - { - "kind": "text", - "spelling": ";" - } - ], - "identifier": { - "interfaceLanguage": "c", - "precise": "c:input.h@T@ExposedTypedefToHidden" - }, - "kind": { - "displayName": "Type Alias", - "identifier": "c.typealias" - }, - "location": { - "position": { - "character": 23, - "line": 18 - }, - "uri": "file://INPUT_DIR/input.h" - }, - "names": { - "navigator": [ - { - "kind": "identifier", - "spelling": "ExposedTypedefToHidden" - } - ], - "subHeading": [ - { - "kind": "identifier", - "spelling": "ExposedTypedefToHidden" - } - ], - "title": "ExposedTypedefToHidden" - }, - "pathComponents": [ - "ExposedTypedefToHidden" - ], - "type": "c:@SA@_HiddenTypedef" - } - ] -} +// expected-no-diagnostics + +// CHECK-NOT: _HiddenRecord +// CHECK-NOT: HiddenRecordMember +// CHECK: ExposedRecord +// CHECK: ExposedRecordMember +// CHECK-NOT: _HIDDEN_MACRO +// CHECK: EXPOSED_MACRO diff --git a/clang/test/ExtractAPI/union.c b/clang/test/ExtractAPI/union.c index 6ec9fd3..8f8300b 100644 --- a/clang/test/ExtractAPI/union.c +++ b/clang/test/ExtractAPI/union.c @@ -2,7 +2,7 @@ // RUN: split-file %s %t // RUN: sed -e "s@INPUT_DIR@%{/t:regex_replacement}@g" \ // RUN: %t/reference.output.json.in >> %t/reference.output.json -// RUN: %clang_cc1 -extract-api -triple arm64-apple-macosx -x c-header\ +// RUN: %clang_cc1 -extract-api --pretty-sgf -triple arm64-apple-macosx -x c-header\ // RUN: %t/input.h -o %t/output.json -verify // Generator version is not consistent across test runs, normalize it. @@ -12,7 +12,7 @@ //--- input.h /// My Union -union Union{ +union Union { /// the a option int a; /// the b option diff --git a/clang/test/ExtractAPI/vfs_redirected_include.m b/clang/test/ExtractAPI/vfs_redirected_include.m index 9ba7e1d..db03820 100644 --- a/clang/test/ExtractAPI/vfs_redirected_include.m +++ b/clang/test/ExtractAPI/vfs_redirected_include.m @@ -14,7 +14,7 @@ // RUN: %t/vfsoverlay.yaml.in >> %t/vfsoverlay.yaml // Input headers use paths to the framework root/DSTROOT -// RUN: %clang_cc1 -extract-api -v --product-name=MyFramework \ +// RUN: %clang_cc1 -extract-api --pretty-sgf -v --product-name=MyFramework \ // RUN: -triple arm64-apple-macosx \ // RUN: -iquote%t -ivfsoverlay %t/vfsoverlay.yaml -F%t/Frameworks \ // RUN: -x objective-c-header \ diff --git a/clang/test/Index/extract-api-cursor.m b/clang/test/Index/extract-api-cursor.m index 1b27b6f..9d9d3a1 100644 --- a/clang/test/Index/extract-api-cursor.m +++ b/clang/test/Index/extract-api-cursor.m @@ -31,6 +31,8 @@ struct Foo { - (void)derivedMethodWithValue:(id)value { int a = 5; } +/// Impl only docs +- (void)implOnlyMethod { } @end // RUN: c-index-test -single-symbol-sgf-at=%s:4:9 local %s | FileCheck -check-prefix=CHECK-FOO %s @@ -118,3 +120,10 @@ struct Foo { // CHECK-DERIVED-METHOD-IMPL: "text":"Derived method docs" // CHECK-DERIVED-METHOD-IMPL: "kind":{"displayName":"Instance Method","identifier":"objective-c.method"} // CHECK-DERIVED-METHOD-IMPL: "title":"derivedMethodWithValue:" + +// RUN: c-index-test -single-symbol-sgf-at=%s:35:11 local %s | FileCheck -check-prefix=CHECK-IMPL-ONLY %s +// CHECK-IMPL-ONLY: "relatedSymbols":[] +// CHECK-IMPL-ONLY: "relationships":[{"kind":"memberOf","source":"c:objc(cs)Derived(im)implOnlyMethod","target":"c:objc(cs)Derived" +// CHECK-IMPL-ONLY: "text":"Impl only docs" +// CHECK-IMPL-ONLY: "kind":{"displayName":"Instance Method","identifier":"objective-c.method"} +// CHECK-IMPL-ONLY: "title":"implOnlyMethod" diff --git a/clang/tools/libclang/CXExtractAPI.cpp b/clang/tools/libclang/CXExtractAPI.cpp index 05098c9..d74f374 100644 --- a/clang/tools/libclang/CXExtractAPI.cpp +++ b/clang/tools/libclang/CXExtractAPI.cpp @@ -18,6 +18,7 @@ #include "clang-c/Index.h" #include "clang-c/Platform.h" #include "clang/AST/Decl.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/DeclObjC.h" #include "clang/Basic/TargetInfo.h" #include "clang/ExtractAPI/API.h" @@ -54,41 +55,20 @@ struct LibClangExtractAPIVisitor if (!shouldDeclBeIncluded(Decl)) return true; - const ObjCInterfaceDecl *Interface = Decl->getClassInterface(); - StringRef Name = Interface->getName(); - StringRef USR = API.recordUSR(Decl); - PresumedLoc Loc = - Context.getSourceManager().getPresumedLoc(Decl->getLocation()); - LinkageInfo Linkage = Decl->getLinkageAndVisibility(); - DocComment Comment; - if (auto *RawComment = fetchRawCommentForDecl(Interface)) - Comment = RawComment->getFormattedLines(Context.getSourceManager(), - Context.getDiagnostics()); - - // Build declaration fragments and sub-heading by generating them for the - // interface. - DeclarationFragments Declaration = - DeclarationFragmentsBuilder::getFragmentsForObjCInterface(Interface); - DeclarationFragments SubHeading = - DeclarationFragmentsBuilder::getSubHeading(Decl); - - // Collect super class information. - SymbolReference SuperClass; - if (const auto *SuperClassDecl = Decl->getSuperClass()) { - SuperClass.Name = SuperClassDecl->getObjCRuntimeNameAsString(); - SuperClass.USR = API.recordUSR(SuperClassDecl); - } + auto *Interface = Decl->getClassInterface(); - ObjCInterfaceRecord *ObjCInterfaceRecord = API.addObjCInterface( - Name, USR, Loc, AvailabilityInfo::createFromDecl(Decl), Linkage, - Comment, Declaration, SubHeading, SuperClass, isInSystemHeader(Decl)); + if (!VisitObjCInterfaceDecl(Interface)) + return false; - // Record all methods (selectors). This doesn't include automatically - // synthesized property methods. - recordObjCMethods(ObjCInterfaceRecord, Decl->methods()); - recordObjCProperties(ObjCInterfaceRecord, Decl->properties()); - recordObjCInstanceVariables(ObjCInterfaceRecord, Decl->ivars()); + SmallString<128> USR; + index::generateUSRForDecl(Interface, USR); + if (auto *InterfaceRecord = dyn_cast_if_present( + API.findRecordForUSR(USR))) { + recordObjCMethods(InterfaceRecord, Decl->methods()); + recordObjCProperties(InterfaceRecord, Decl->properties()); + recordObjCInstanceVariables(InterfaceRecord, Decl->ivars()); + } return true; } }; @@ -96,21 +76,14 @@ struct LibClangExtractAPIVisitor DEFINE_SIMPLE_CONVERSION_FUNCTIONS(APISet, CXAPISet) -static void WalkupFromMostDerivedType(LibClangExtractAPIVisitor &Visitor, - Decl *D); - -template -static bool WalkupParentContext(DeclContext *Parent, - LibClangExtractAPIVisitor &Visitor) { - if (auto *D = dyn_cast(Parent)) { - WalkupFromMostDerivedType(Visitor, D); - return true; - } - return false; -} - +// Visits the Decl D and it's transitive DeclContexts recursively, starting from +// the outer-most context. This is guaranteed to visit every Decl we need in the +// right order to generate symbol graph information for D. static void WalkupFromMostDerivedType(LibClangExtractAPIVisitor &Visitor, Decl *D) { + if (auto *Parent = D->getDeclContext()) + WalkupFromMostDerivedType(Visitor, cast(Parent)); + switch (D->getKind()) { #define ABSTRACT_DECL(DECL) #define DECL(CLASS, BASE) \ @@ -119,20 +92,12 @@ static void WalkupFromMostDerivedType(LibClangExtractAPIVisitor &Visitor, break; #include "clang/AST/DeclNodes.inc" } - - for (auto *Parent = D->getDeclContext(); Parent != nullptr; - Parent = Parent->getParent()) { - if (WalkupParentContext(Parent, Visitor)) - return; - if (WalkupParentContext(Parent, Visitor)) - return; - } } static CXString GenerateCXStringFromSymbolGraphData(llvm::json::Object Obj) { llvm::SmallString<0> BackingString; llvm::raw_svector_ostream OS(BackingString); - OS << Value(std::move(Obj)); + OS << llvm::formatv("{0}", Value(std::move(Obj))); return cxstring::createDup(BackingString.str()); } -- cgit v1.1 From bf1df250487584ec77b0ab567cd3cca5c2863270 Mon Sep 17 00:00:00 2001 From: Han-Kuan Chen Date: Wed, 3 Apr 2024 17:57:46 +0800 Subject: [SLP] Use isValidElementType instead of (#87469) FixedVectorType::isValidElementType for consistency. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 6e7dcb9..db052ce 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -8447,7 +8447,7 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, else if (auto *IE = dyn_cast(VL[0])) ScalarTy = IE->getOperand(1)->getType(); } - if (!FixedVectorType::isValidElementType(ScalarTy)) + if (!isValidElementType(ScalarTy)) return InstructionCost::getInvalid(); auto *VecTy = FixedVectorType::get(ScalarTy, VL.size()); TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; -- cgit v1.1 From ca48d4dfd3148d83f9a74737f08174f16177200f Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Apr 2024 06:06:52 -0400 Subject: [libc++] Add a static_assert for a Mandates in seed_seq (#86992) Fixes #84843 --- libcxx/include/__random/seed_seq.h | 6 +++ .../rand.util.seedseq/generate.mandates.verify.cpp | 58 ++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.mandates.verify.cpp diff --git a/libcxx/include/__random/seed_seq.h b/libcxx/include/__random/seed_seq.h index 7e98887..5cf84ae 100644 --- a/libcxx/include/__random/seed_seq.h +++ b/libcxx/include/__random/seed_seq.h @@ -14,6 +14,7 @@ #include <__algorithm/max.h> #include <__config> #include <__iterator/iterator_traits.h> +#include <__type_traits/is_unsigned.h> #include #include #include @@ -79,6 +80,11 @@ void seed_seq::__init(_InputIterator __first, _InputIterator __last) { template void seed_seq::generate(_RandomAccessIterator __first, _RandomAccessIterator __last) { + using _ValueType = typename iterator_traits<_RandomAccessIterator>::value_type; + static_assert(is_unsigned<_ValueType>::value && sizeof(_ValueType) >= sizeof(uint32_t), + "[rand.util.seedseq]/7 requires the value_type of the iterator to be an unsigned " + "integer capable of accommodating 32-bit quantities."); + if (__first != __last) { std::fill(__first, __last, 0x8b8b8b8b); const size_t __n = static_cast(__last - __first); diff --git a/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.mandates.verify.cpp b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.mandates.verify.cpp new file mode 100644 index 0000000..a8ea31b --- /dev/null +++ b/libcxx/test/std/numerics/rand/rand.util/rand.util.seedseq/generate.mandates.verify.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// + +// class seed_seq; + +// template +// void generate(RandomAccessIterator begin, RandomAccessIterator end); + +// Check the following requirement: https://eel.is/c++draft/rand.util.seedseq#7 +// +// Mandates: iterator_traits::value_type is an unsigned integer +// type capable of accommodating 32-bit quantities. + +// UNSUPPORTED: c++03 +// REQUIRES: stdlib=libc++ + +#include +#include + +#include "test_macros.h" + +void f() { + std::seed_seq seq; + + // Not an integral type + { + double* p = nullptr; + seq.generate(p, p); // expected-error-re@*:* {{static assertion failed{{.+}}: [rand.util.seedseq]/7 requires{{.+}}}} + // expected-error@*:* 0+ {{invalid operands to}} + } + + // Not an unsigned type + { + long long* p = nullptr; + seq.generate(p, p); // expected-error-re@*:* {{static assertion failed{{.+}}: [rand.util.seedseq]/7 requires{{.+}}}} + } + + // Not a 32-bit type + { +#if UCHAR_MAX < UINT32_MAX + unsigned char* p = nullptr; + seq.generate(p, p); // expected-error-re@*:* {{static assertion failed{{.+}}: [rand.util.seedseq]/7 requires{{.+}}}} +#endif + } + + // Everything satisfied + { + unsigned long* p = nullptr; + seq.generate(p, p); // no diagnostic + } +} -- cgit v1.1 From d1f585056f71bc63bd2e71d744051139809e5d8b Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Apr 2024 06:07:46 -0400 Subject: [libc++] Fix tests on musl (#85085) (#86934) One or two of the tests need slight tweaks to make them pass when building with musl. This patch is a re-application of b61fb18 which was reverted in 0847c90 because it broke the build. rdar://118885724 Co-authored-by: Alastair Houghton --- .../generic_category.pass.cpp | 19 +++++--- .../syserr.errcat.objects/system_category.pass.cpp | 19 +++++--- .../facet.num.put.members/put_long_double.pass.cpp | 51 ++++++++++------------ 3 files changed, 47 insertions(+), 42 deletions(-) diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp index 068202c..d4bbde7 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/generic_category.pass.cpp @@ -44,14 +44,19 @@ int main(int, char**) errno = E2BIG; // something that message will never generate const std::error_category& e_cat1 = std::generic_category(); const std::string msg = e_cat1.message(-1); - // Exact message format varies by platform. -#if defined(_AIX) - LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0); -#elif defined(_NEWLIB_VERSION) - LIBCPP_ASSERT(msg.empty()); -#else - LIBCPP_ASSERT(msg.rfind("Unknown error", 0) == 0); + // Exact message format varies by platform. We can't detect + // some of these (Musl in particular) using the preprocessor, + // so accept a few sensible messages. Newlib unfortunately + // responds with an empty message, which we probably want to + // treat as a failure code otherwise, but we can detect that + // with the preprocessor. + LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX + || msg.rfind("No error information", 0) == 0 // Musl + || msg.rfind("Unknown error", 0) == 0 // Glibc +#if defined(_NEWLIB_VERSION) + || msg.empty() #endif + ); assert(errno == E2BIG); } diff --git a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp index 42fdd1c..eefbddd 100644 --- a/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp +++ b/libcxx/test/std/diagnostics/syserr/syserr.errcat/syserr.errcat.objects/system_category.pass.cpp @@ -50,14 +50,19 @@ int main(int, char**) { errno = E2BIG; // something that message will never generate const std::error_category& e_cat1 = std::system_category(); const std::string msg = e_cat1.message(-1); - // Exact message format varies by platform. -#if defined(_AIX) - LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0); -#elif defined(_NEWLIB_VERSION) - LIBCPP_ASSERT(msg.empty()); -#else - LIBCPP_ASSERT(msg.rfind("Unknown error", 0) == 0); + // Exact message format varies by platform. We can't detect + // some of these (Musl in particular) using the preprocessor, + // so accept a few sensible messages. Newlib unfortunately + // responds with an empty message, which we probably want to + // treat as a failure code otherwise, but we can detect that + // with the preprocessor. + LIBCPP_ASSERT(msg.rfind("Error -1 occurred", 0) == 0 // AIX + || msg.rfind("No error information", 0) == 0 // Musl + || msg.rfind("Unknown error", 0) == 0 // Glibc +#if defined(_NEWLIB_VERSION) + || msg.empty() #endif + ); assert(errno == E2BIG); } diff --git a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp index 8637a93..16e4ea7 100644 --- a/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp +++ b/libcxx/test/std/localization/locale.categories/category.numeric/locale.nm.put/facet.num.put.members/put_long_double.pass.cpp @@ -13,15 +13,11 @@ // iter_type put(iter_type s, ios_base& iob, char_type fill, long double v) const; // XFAIL: win32-broken-printf-g-precision -// XFAIL: LIBCXX-PICOLIBC-FIXME - -// Needs more investigation, but this is probably failing on Android M (API 23) -// and up because the printf formatting of NAN changed. -// XFAIL: LIBCXX-ANDROID-FIXME && !android-device-api={{21|22}} #include #include #include +#include #include #include #include "test_macros.h" @@ -8934,11 +8930,10 @@ void test4() char str[200]; std::locale lc = std::locale::classic(); std::locale lg(lc, new my_numpunct); -#ifdef _AIX - std::string inf = "INF"; -#else - std::string inf = "inf"; -#endif + + // This should match the underlying C library + std::snprintf(str, sizeof(str), "%f", INFINITY); + std::string inf = str; const my_facet f(1); { @@ -10727,24 +10722,24 @@ void test5() std::locale lc = std::locale::classic(); std::locale lg(lc, new my_numpunct); const my_facet f(1); -#if defined(_AIX) - std::string nan= "NaNQ"; - std::string NaN = "NaNQ"; - std::string nan_padding25 = "*********************"; - std::string pnan_sign = "+"; - std::string pnan_padding25 = "********************"; -#else - std::string nan= "nan"; - std::string NaN = "NAN"; - std::string nan_padding25 = "**********************"; -#if defined(TEST_HAS_GLIBC) || defined(_WIN32) - std::string pnan_sign = "+"; - std::string pnan_padding25 = "*********************"; -#else - std::string pnan_sign = ""; - std::string pnan_padding25 = "**********************"; -#endif -#endif + + // The output here depends on the underlying C library, so work out what + // that does. + std::snprintf(str, sizeof(str), "%f", std::nan("")); + std::string nan = str; + + std::snprintf(str, sizeof(str), "%F", std::nan("")); + std::string NaN = str; + + std::snprintf(str, sizeof(str), "%+f", std::nan("")); + std::string pnan_sign; + if (str[0] == '+') { + pnan_sign = "+"; + } + + std::string nan_padding25 = std::string(25 - nan.length(), '*'); + std::string pnan_padding25 = std::string(25 - nan.length() - pnan_sign.length(), '*'); + { long double v = std::nan(""); std::ios ios(0); -- cgit v1.1 From d0dcf06ab8723cc4358ad446354cce875dd89577 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Wed, 3 Apr 2024 12:33:08 +0200 Subject: [bazel] Port for e05c1b46d0d3739cc48ad912dbe6e9affce05927. --- utils/bazel/llvm-project-overlay/clang/BUILD.bazel | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel index 1bf6bee..14f2b45 100644 --- a/utils/bazel/llvm-project-overlay/clang/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/clang/BUILD.bazel @@ -2651,7 +2651,11 @@ cc_library( srcs = glob([ "lib/ExtractAPI/**/*.cpp", ]), - hdrs = glob(["include/clang/ExtractAPI/**/*.h"]), + hdrs = glob([ + "include/clang/ExtractAPI/**/*.h", + ]) + [ + "include/clang/ExtractAPI/APIRecords.inc", + ], includes = ["include"], deps = [ ":ast", -- cgit v1.1 From 1f268092c7af20c21d4a594678b647cab050602a Mon Sep 17 00:00:00 2001 From: Simon Camphausen Date: Wed, 3 Apr 2024 13:06:14 +0200 Subject: [mlir][EmitC] Add support for pointer and opaque types to subscript op (#86266) For pointer types the indices are restricted to one integer-like operand. For opaque types no further restrictions are made. --- mlir/include/mlir/Dialect/EmitC/IR/EmitC.h | 6 ++ mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 30 ++++++---- .../lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp | 15 ++++- mlir/lib/Dialect/EmitC/IR/EmitC.cpp | 64 ++++++++++++++++++++-- mlir/lib/Target/Cpp/TranslateToCpp.cpp | 2 +- .../Conversion/MemRefToEmitC/memref-to-emitc.mlir | 4 +- mlir/test/Dialect/EmitC/invalid_ops.mlir | 46 +++++++++++++++- mlir/test/Dialect/EmitC/ops.mlir | 7 +++ mlir/test/Target/Cpp/subscript.mlir | 32 +++++++++-- 9 files changed, 175 insertions(+), 31 deletions(-) diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h index 725a1bc..c039156 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.h @@ -30,8 +30,14 @@ namespace mlir { namespace emitc { void buildTerminatedBody(OpBuilder &builder, Location loc); + /// Determines whether \p type is a valid integer type in EmitC. bool isSupportedIntegerType(mlir::Type type); + +/// Determines whether \p type is integer like, i.e. it's a supported integer, +/// an index or opaque type. +bool isIntegerIndexOrOpaqueType(Type type); + /// Determines whether \p type is a valid floating-point type in EmitC. bool isSupportedFloatType(mlir::Type type); } // namespace emitc diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index d746222..090dae8 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -1155,35 +1155,41 @@ def EmitC_IfOp : EmitC_Op<"if", let hasCustomAssemblyFormat = 1; } -def EmitC_SubscriptOp : EmitC_Op<"subscript", - [TypesMatchWith<"result type matches element type of 'array'", - "array", "result", - "::llvm::cast($_self).getElementType()">]> { - let summary = "Array subscript operation"; +def EmitC_SubscriptOp : EmitC_Op<"subscript", []> { + let summary = "Subscript operation"; let description = [{ With the `subscript` operation the subscript operator `[]` can be applied - to variables or arguments of array type. + to variables or arguments of array, pointer and opaque type. Example: ```mlir %i = index.constant 1 %j = index.constant 7 - %0 = emitc.subscript %arg0[%i, %j] : <4x8xf32>, index, index + %0 = emitc.subscript %arg0[%i, %j] : !emitc.array<4x8xf32>, index, index + %1 = emitc.subscript %arg1[%i] : !emitc.ptr, index ``` }]; - let arguments = (ins Arg:$array, - Variadic:$indices); + let arguments = (ins Arg, + "the value to subscript">:$value, + Variadic:$indices); let results = (outs AnyType:$result); let builders = [ - OpBuilder<(ins "Value":$array, "ValueRange":$indices), [{ - build($_builder, $_state, cast(array.getType()).getElementType(), array, indices); + OpBuilder<(ins "TypedValue":$array, "ValueRange":$indices), [{ + build($_builder, $_state, array.getType().getElementType(), array, indices); + }]>, + OpBuilder<(ins "TypedValue":$pointer, "Value":$index), [{ + build($_builder, $_state, pointer.getType().getPointee(), pointer, + ValueRange{index}); }]> ]; let hasVerifier = 1; - let assemblyFormat = "$array `[` $indices `]` attr-dict `:` type($array) `,` type($indices)"; + let assemblyFormat = "$value `[` $indices `]` attr-dict `:` functional-type(operands, results)"; } diff --git a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp index 0e3b646..25fa158 100644 --- a/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp +++ b/mlir/lib/Conversion/MemRefToEmitC/MemRefToEmitC.cpp @@ -62,8 +62,14 @@ struct ConvertLoad final : public OpConversionPattern { return rewriter.notifyMatchFailure(op.getLoc(), "cannot convert type"); } + auto arrayValue = + dyn_cast>(operands.getMemref()); + if (!arrayValue) { + return rewriter.notifyMatchFailure(op.getLoc(), "expected array type"); + } + auto subscript = rewriter.create( - op.getLoc(), operands.getMemref(), operands.getIndices()); + op.getLoc(), arrayValue, operands.getIndices()); auto noInit = emitc::OpaqueAttr::get(getContext(), ""); auto var = @@ -81,9 +87,14 @@ struct ConvertStore final : public OpConversionPattern { LogicalResult matchAndRewrite(memref::StoreOp op, OpAdaptor operands, ConversionPatternRewriter &rewriter) const override { + auto arrayValue = + dyn_cast>(operands.getMemref()); + if (!arrayValue) { + return rewriter.notifyMatchFailure(op.getLoc(), "expected array type"); + } auto subscript = rewriter.create( - op.getLoc(), operands.getMemref(), operands.getIndices()); + op.getLoc(), arrayValue, operands.getIndices()); rewriter.replaceOpWithNewOp(op, subscript, operands.getValue()); return success(); diff --git a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp index f4a9dc3..7cbf28b 100644 --- a/mlir/lib/Dialect/EmitC/IR/EmitC.cpp +++ b/mlir/lib/Dialect/EmitC/IR/EmitC.cpp @@ -70,6 +70,11 @@ bool mlir::emitc::isSupportedIntegerType(Type type) { return false; } +bool mlir::emitc::isIntegerIndexOrOpaqueType(Type type) { + return llvm::isa(type) || + isSupportedIntegerType(type); +} + bool mlir::emitc::isSupportedFloatType(Type type) { if (auto floatType = llvm::dyn_cast(type)) { switch (floatType.getWidth()) { @@ -780,12 +785,61 @@ LogicalResult emitc::YieldOp::verify() { //===----------------------------------------------------------------------===// LogicalResult emitc::SubscriptOp::verify() { - if (getIndices().size() != (size_t)getArray().getType().getRank()) { - return emitOpError() << "requires number of indices (" - << getIndices().size() - << ") to match the rank of the array type (" - << getArray().getType().getRank() << ")"; + // Checks for array operand. + if (auto arrayType = llvm::dyn_cast(getValue().getType())) { + // Check number of indices. + if (getIndices().size() != (size_t)arrayType.getRank()) { + return emitOpError() << "on array operand requires number of indices (" + << getIndices().size() + << ") to match the rank of the array type (" + << arrayType.getRank() << ")"; + } + // Check types of index operands. + for (unsigned i = 0, e = getIndices().size(); i != e; ++i) { + Type type = getIndices()[i].getType(); + if (!isIntegerIndexOrOpaqueType(type)) { + return emitOpError() << "on array operand requires index operand " << i + << " to be integer-like, but got " << type; + } + } + // Check element type. + Type elementType = arrayType.getElementType(); + if (elementType != getType()) { + return emitOpError() << "on array operand requires element type (" + << elementType << ") and result type (" << getType() + << ") to match"; + } + return success(); } + + // Checks for pointer operand. + if (auto pointerType = + llvm::dyn_cast(getValue().getType())) { + // Check number of indices. + if (getIndices().size() != 1) { + return emitOpError() + << "on pointer operand requires one index operand, but got " + << getIndices().size(); + } + // Check types of index operand. + Type type = getIndices()[0].getType(); + if (!isIntegerIndexOrOpaqueType(type)) { + return emitOpError() << "on pointer operand requires index operand to be " + "integer-like, but got " + << type; + } + // Check pointee type. + Type pointeeType = pointerType.getPointee(); + if (pointeeType != getType()) { + return emitOpError() << "on pointer operand requires pointee type (" + << pointeeType << ") and result type (" << getType() + << ") to match"; + } + return success(); + } + + // The operand has opaque type, so we can't assume anything about the number + // or types of index operands. return success(); } diff --git a/mlir/lib/Target/Cpp/TranslateToCpp.cpp b/mlir/lib/Target/Cpp/TranslateToCpp.cpp index 0b07b4b..ee87c1d 100644 --- a/mlir/lib/Target/Cpp/TranslateToCpp.cpp +++ b/mlir/lib/Target/Cpp/TranslateToCpp.cpp @@ -1104,7 +1104,7 @@ CppEmitter::CppEmitter(raw_ostream &os, bool declareVariablesAtTop) std::string CppEmitter::getSubscriptName(emitc::SubscriptOp op) { std::string out; llvm::raw_string_ostream ss(out); - ss << getOrCreateName(op.getArray()); + ss << getOrCreateName(op.getValue()); for (auto index : op.getIndices()) { ss << "[" << getOrCreateName(index) << "]"; } diff --git a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir index 9793b2d..7aa2ba8 100644 --- a/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir +++ b/mlir/test/Conversion/MemRefToEmitC/memref-to-emitc.mlir @@ -6,7 +6,7 @@ func.func @memref_store(%v : f32, %i: index, %j: index) { // CHECK: %[[ALLOCA:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.array<4x8xf32> %0 = memref.alloca() : memref<4x8xf32> - // CHECK: %[[SUBSCRIPT:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : <4x8xf32> + // CHECK: %[[SUBSCRIPT:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> f32 // CHECK: emitc.assign %[[v]] : f32 to %[[SUBSCRIPT:.*]] : f32 memref.store %v, %0[%i, %j] : memref<4x8xf32> return @@ -19,7 +19,7 @@ func.func @memref_load(%i: index, %j: index) -> f32 { // CHECK: %[[ALLOCA:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> !emitc.array<4x8xf32> %0 = memref.alloca() : memref<4x8xf32> - // CHECK: %[[LOAD:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : <4x8xf32> + // CHECK: %[[LOAD:.*]] = emitc.subscript %[[ALLOCA]][%[[i]], %[[j]]] : (!emitc.array<4x8xf32>, index, index) -> f32 // CHECK: %[[VAR:.*]] = "emitc.variable"() <{value = #emitc.opaque<"">}> : () -> f32 // CHECK: emitc.assign %[[LOAD]] : f32 to %[[VAR]] : f32 %1 = memref.load %0[%i, %j] : memref<4x8xf32> diff --git a/mlir/test/Dialect/EmitC/invalid_ops.mlir b/mlir/test/Dialect/EmitC/invalid_ops.mlir index 22423cf..bbaab0d 100644 --- a/mlir/test/Dialect/EmitC/invalid_ops.mlir +++ b/mlir/test/Dialect/EmitC/invalid_ops.mlir @@ -390,8 +390,48 @@ func.func @logical_or_resulterror(%arg0: i32, %arg1: i32) { // ----- -func.func @test_subscript_indices_mismatch(%arg0: !emitc.array<4x8xf32>, %arg2: index) { - // expected-error @+1 {{'emitc.subscript' op requires number of indices (1) to match the rank of the array type (2)}} - %0 = emitc.subscript %arg0[%arg2] : <4x8xf32>, index +func.func @test_subscript_array_indices_mismatch(%arg0: !emitc.array<4x8xf32>, %arg1: index) { + // expected-error @+1 {{'emitc.subscript' op on array operand requires number of indices (1) to match the rank of the array type (2)}} + %0 = emitc.subscript %arg0[%arg1] : (!emitc.array<4x8xf32>, index) -> f32 + return +} + +// ----- + +func.func @test_subscript_array_index_type_mismatch(%arg0: !emitc.array<4x8xf32>, %arg1: index, %arg2: f32) { + // expected-error @+1 {{'emitc.subscript' op on array operand requires index operand 1 to be integer-like, but got 'f32'}} + %0 = emitc.subscript %arg0[%arg1, %arg2] : (!emitc.array<4x8xf32>, index, f32) -> f32 + return +} + +// ----- + +func.func @test_subscript_array_type_mismatch(%arg0: !emitc.array<4x8xf32>, %arg1: index, %arg2: index) { + // expected-error @+1 {{'emitc.subscript' op on array operand requires element type ('f32') and result type ('i32') to match}} + %0 = emitc.subscript %arg0[%arg1, %arg2] : (!emitc.array<4x8xf32>, index, index) -> i32 + return +} + +// ----- + +func.func @test_subscript_ptr_indices_mismatch(%arg0: !emitc.ptr, %arg1: index) { + // expected-error @+1 {{'emitc.subscript' op on pointer operand requires one index operand, but got 2}} + %0 = emitc.subscript %arg0[%arg1, %arg1] : (!emitc.ptr, index, index) -> f32 + return +} + +// ----- + +func.func @test_subscript_ptr_index_type_mismatch(%arg0: !emitc.ptr, %arg1: f64) { + // expected-error @+1 {{'emitc.subscript' op on pointer operand requires index operand to be integer-like, but got 'f64'}} + %0 = emitc.subscript %arg0[%arg1] : (!emitc.ptr, f64) -> f32 + return +} + +// ----- + +func.func @test_subscript_ptr_type_mismatch(%arg0: !emitc.ptr, %arg1: index) { + // expected-error @+1 {{'emitc.subscript' op on pointer operand requires pointee type ('f32') and result type ('f64') to match}} + %0 = emitc.subscript %arg0[%arg1] : (!emitc.ptr, index) -> f64 return } diff --git a/mlir/test/Dialect/EmitC/ops.mlir b/mlir/test/Dialect/EmitC/ops.mlir index 5f00a29..ace3670 100644 --- a/mlir/test/Dialect/EmitC/ops.mlir +++ b/mlir/test/Dialect/EmitC/ops.mlir @@ -214,6 +214,13 @@ func.func @test_for_not_index_induction(%arg0 : i16, %arg1 : i16, %arg2 : i16) { return } +func.func @test_subscript(%arg0 : !emitc.array<2x3xf32>, %arg1 : !emitc.ptr, %arg2 : !emitc.opaque<"std::map">, %idx0 : index, %idx1 : i32, %idx2 : !emitc.opaque<"char">) { + %0 = emitc.subscript %arg0[%idx0, %idx1] : (!emitc.array<2x3xf32>, index, i32) -> f32 + %1 = emitc.subscript %arg1[%idx0] : (!emitc.ptr, index) -> i32 + %2 = emitc.subscript %arg2[%idx2] : (!emitc.opaque<"std::map">, !emitc.opaque<"char">) -> !emitc.opaque<"int"> + return +} + emitc.verbatim "#ifdef __cplusplus" emitc.verbatim "extern \"C\" {" emitc.verbatim "#endif // __cplusplus" diff --git a/mlir/test/Target/Cpp/subscript.mlir b/mlir/test/Target/Cpp/subscript.mlir index a6c82df..0b38895 100644 --- a/mlir/test/Target/Cpp/subscript.mlir +++ b/mlir/test/Target/Cpp/subscript.mlir @@ -1,24 +1,44 @@ // RUN: mlir-translate -mlir-to-cpp %s | FileCheck %s // RUN: mlir-translate -mlir-to-cpp -declare-variables-at-top %s | FileCheck %s -func.func @load_store(%arg0: !emitc.array<4x8xf32>, %arg1: !emitc.array<3x5xf32>, %arg2: index, %arg3: index) { - %0 = emitc.subscript %arg0[%arg2, %arg3] : <4x8xf32>, index, index - %1 = emitc.subscript %arg1[%arg2, %arg3] : <3x5xf32>, index, index +func.func @load_store_array(%arg0: !emitc.array<4x8xf32>, %arg1: !emitc.array<3x5xf32>, %arg2: index, %arg3: index) { + %0 = emitc.subscript %arg0[%arg2, %arg3] : (!emitc.array<4x8xf32>, index, index) -> f32 + %1 = emitc.subscript %arg1[%arg2, %arg3] : (!emitc.array<3x5xf32>, index, index) -> f32 emitc.assign %0 : f32 to %1 : f32 return } -// CHECK: void load_store(float [[ARR1:[^ ]*]][4][8], float [[ARR2:[^ ]*]][3][5], +// CHECK: void load_store_array(float [[ARR1:[^ ]*]][4][8], float [[ARR2:[^ ]*]][3][5], // CHECK-SAME: size_t [[I:[^ ]*]], size_t [[J:[^ ]*]]) // CHECK-NEXT: [[ARR2]][[[I]]][[[J]]] = [[ARR1]][[[I]]][[[J]]]; +func.func @load_store_pointer(%arg0: !emitc.ptr, %arg1: !emitc.ptr, %arg2: index, %arg3: index) { + %0 = emitc.subscript %arg0[%arg2] : (!emitc.ptr, index) -> f32 + %1 = emitc.subscript %arg1[%arg3] : (!emitc.ptr, index) -> f32 + emitc.assign %0 : f32 to %1 : f32 + return +} +// CHECK: void load_store_pointer(float* [[PTR1:[^ ]*]], float* [[PTR2:[^ ]*]], +// CHECK-SAME: size_t [[I:[^ ]*]], size_t [[J:[^ ]*]]) +// CHECK-NEXT: [[PTR2]][[[J]]] = [[PTR1]][[[I]]]; + +func.func @load_store_opaque(%arg0: !emitc.opaque<"std::map">, %arg1: !emitc.opaque<"std::map">, %arg2: !emitc.opaque<"char">, %arg3: !emitc.opaque<"char">) { + %0 = emitc.subscript %arg0[%arg2] : (!emitc.opaque<"std::map">, !emitc.opaque<"char">) -> !emitc.opaque<"int"> + %1 = emitc.subscript %arg1[%arg3] : (!emitc.opaque<"std::map">, !emitc.opaque<"char">) -> !emitc.opaque<"int"> + emitc.assign %0 : !emitc.opaque<"int"> to %1 : !emitc.opaque<"int"> + return +} +// CHECK: void load_store_opaque(std::map [[MAP1:[^ ]*]], std::map [[MAP2:[^ ]*]], +// CHECK-SAME: char [[I:[^ ]*]], char [[J:[^ ]*]]) +// CHECK-NEXT: [[MAP2]][[[J]]] = [[MAP1]][[[I]]]; + emitc.func @func1(%arg0 : f32) { emitc.return } emitc.func @call_arg(%arg0: !emitc.array<4x8xf32>, %i: i32, %j: i16, %k: i8) { - %0 = emitc.subscript %arg0[%i, %j] : <4x8xf32>, i32, i16 - %1 = emitc.subscript %arg0[%j, %k] : <4x8xf32>, i16, i8 + %0 = emitc.subscript %arg0[%i, %j] : (!emitc.array<4x8xf32>, i32, i16) -> f32 + %1 = emitc.subscript %arg0[%j, %k] : (!emitc.array<4x8xf32>, i16, i8) -> f32 emitc.call @func1 (%0) : (f32) -> () emitc.call_opaque "func2" (%1) : (f32) -> () -- cgit v1.1 From 956b47b48616148c15f8f95d76d5e0c215fe095c Mon Sep 17 00:00:00 2001 From: Longsheng Mou Date: Wed, 3 Apr 2024 19:12:12 +0800 Subject: [X86_32] Teach X86_32 va_arg to ignore empty structs. (#86075) Empty structs are ignored for parameter passing purposes, but va_arg was incrementing the pointer anyway for that the size of empty struct in c++ is 1 byte, which could lead to va_list getting out of sync. Fix #86057. --- clang/lib/CodeGen/Targets/X86.cpp | 6 ++++++ clang/test/CodeGenCXX/x86_32-vaarg.cpp | 21 +++++++++++++++++++++ 2 files changed, 27 insertions(+) create mode 100644 clang/test/CodeGenCXX/x86_32-vaarg.cpp diff --git a/clang/lib/CodeGen/Targets/X86.cpp b/clang/lib/CodeGen/Targets/X86.cpp index 1146a85..c831777 100644 --- a/clang/lib/CodeGen/Targets/X86.cpp +++ b/clang/lib/CodeGen/Targets/X86.cpp @@ -1069,6 +1069,12 @@ Address X86_32ABIInfo::EmitVAArg(CodeGenFunction &CGF, auto TypeInfo = getContext().getTypeInfoInChars(Ty); + CCState State(*const_cast(CGF.CurFnInfo)); + ABIArgInfo AI = classifyArgumentType(Ty, State, /*ArgIndex*/ 0); + // Empty records are ignored for parameter passing purposes. + if (AI.isIgnore()) + return CGF.CreateMemTemp(Ty); + // x86-32 changes the alignment of certain arguments on the stack. // // Just messing with TypeInfo like this works because we never pass diff --git a/clang/test/CodeGenCXX/x86_32-vaarg.cpp b/clang/test/CodeGenCXX/x86_32-vaarg.cpp new file mode 100644 index 0000000..dcc2f7f --- /dev/null +++ b/clang/test/CodeGenCXX/x86_32-vaarg.cpp @@ -0,0 +1,21 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// RUN: %clang_cc1 -triple i386-linux-gnu -emit-llvm -o - %s | FileCheck %s + +typedef struct {} empty; + +// CHECK-LABEL: @_Z17empty_record_testiz( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[Z_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[LIST:%.*]] = alloca ptr, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca [[STRUCT_EMPTY:%.*]], align 1 +// CHECK-NEXT: store ptr [[AGG_RESULT:%.*]], ptr [[RESULT_PTR]], align 4 +// CHECK-NEXT: store i32 [[Z:%.*]], ptr [[Z_ADDR]], align 4 +// CHECK-NEXT: call void @llvm.va_start.p0(ptr [[LIST]]) +// CHECK-NEXT: ret void +// +empty empty_record_test(int z, ...) { + __builtin_va_list list; + __builtin_va_start(list, z); + return __builtin_va_arg(list, empty); +} -- cgit v1.1 From 0356d0cfdc5cc7173533c2ad6c2ea8ad342f1acc Mon Sep 17 00:00:00 2001 From: Gleb Popov <6yearold@gmail.com> Date: Wed, 3 Apr 2024 14:26:12 +0300 Subject: Print more descriptive error message when trying to link a global with appending linkage (#69613) This is a proper fix for https://github.com/llvm/llvm-project/issues/40308 --- llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 2 +- llvm/test/CodeGen/X86/AppendingLinkage.ll | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index a155387..293bb5a 100644 --- a/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -2927,7 +2927,7 @@ bool AsmPrinter::emitSpecialLLVMGlobal(const GlobalVariable *GV) { return true; } - report_fatal_error("unknown special variable"); + report_fatal_error("unknown special variable with appending linkage"); } /// EmitLLVMUsedList - For targets that define a MAI::UsedDirective, mark each diff --git a/llvm/test/CodeGen/X86/AppendingLinkage.ll b/llvm/test/CodeGen/X86/AppendingLinkage.ll index 83bfbe8..ace5d19 100644 --- a/llvm/test/CodeGen/X86/AppendingLinkage.ll +++ b/llvm/test/CodeGen/X86/AppendingLinkage.ll @@ -1,4 +1,4 @@ ; RUN: not --crash llc < %s -mtriple=i686-- 2>&1 | FileCheck %s -; CHECK: unknown special variable +; CHECK: unknown special variable with appending linkage @foo = appending constant [1 x i32 ]zeroinitializer -- cgit v1.1 From 5c1544c95394b79b377c7137ac34e3e63b6d5ee5 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Wed, 3 Apr 2024 13:28:23 +0200 Subject: [Object][COFF][NFC] Introduce getMachineArchType helper. (#87370) It's a common pattern that we have a machine type, but we don't care which ARM64* platform we're dealing with. We already have isAnyArm64 for that, but it does not fit cases where we use a switch statement. With this helper, it's easy to simplify such cases by using Triple::ArchType instead of machine type. --- llvm/include/llvm/Object/WindowsMachineFlag.h | 20 +++++++++++++ llvm/lib/Object/COFFObjectFile.cpp | 42 ++++++++------------------- llvm/lib/Object/WindowsResource.cpp | 13 ++++----- 3 files changed, 38 insertions(+), 37 deletions(-) diff --git a/llvm/include/llvm/Object/WindowsMachineFlag.h b/llvm/include/llvm/Object/WindowsMachineFlag.h index 05b8f0d..1cb408e 100644 --- a/llvm/include/llvm/Object/WindowsMachineFlag.h +++ b/llvm/include/llvm/Object/WindowsMachineFlag.h @@ -13,6 +13,9 @@ #ifndef LLVM_OBJECT_WINDOWSMACHINEFLAG_H #define LLVM_OBJECT_WINDOWSMACHINEFLAG_H +#include "llvm/BinaryFormat/COFF.h" +#include "llvm/TargetParser/Triple.h" + namespace llvm { class StringRef; @@ -28,6 +31,23 @@ StringRef machineToStr(COFF::MachineTypes MT); // Only returns ARMNT, ARM64, AMD64, I386, or IMAGE_FILE_MACHINE_UNKNOWN. COFF::MachineTypes getMachineType(StringRef S); +template Triple::ArchType getMachineArchType(T machine) { + switch (machine) { + case COFF::IMAGE_FILE_MACHINE_I386: + return llvm::Triple::ArchType::x86; + case COFF::IMAGE_FILE_MACHINE_AMD64: + return llvm::Triple::ArchType::x86_64; + case COFF::IMAGE_FILE_MACHINE_ARMNT: + return llvm::Triple::ArchType::thumb; + case COFF::IMAGE_FILE_MACHINE_ARM64: + case COFF::IMAGE_FILE_MACHINE_ARM64EC: + case COFF::IMAGE_FILE_MACHINE_ARM64X: + return llvm::Triple::ArchType::aarch64; + default: + return llvm::Triple::ArchType::UnknownArch; + } } +} // namespace llvm + #endif diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index 87009126..18506f3 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -14,18 +14,17 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/iterator_range.h" -#include "llvm/BinaryFormat/COFF.h" #include "llvm/Object/Binary.h" #include "llvm/Object/COFF.h" #include "llvm/Object/Error.h" #include "llvm/Object/ObjectFile.h" +#include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/BinaryStreamReader.h" #include "llvm/Support/Endian.h" #include "llvm/Support/Error.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBufferRef.h" -#include "llvm/TargetParser/Triple.h" #include #include #include @@ -1072,20 +1071,7 @@ StringRef COFFObjectFile::getFileFormatName() const { } Triple::ArchType COFFObjectFile::getArch() const { - switch (getMachine()) { - case COFF::IMAGE_FILE_MACHINE_I386: - return Triple::x86; - case COFF::IMAGE_FILE_MACHINE_AMD64: - return Triple::x86_64; - case COFF::IMAGE_FILE_MACHINE_ARMNT: - return Triple::thumb; - case COFF::IMAGE_FILE_MACHINE_ARM64: - case COFF::IMAGE_FILE_MACHINE_ARM64EC: - case COFF::IMAGE_FILE_MACHINE_ARM64X: - return Triple::aarch64; - default: - return Triple::UnknownArch; - } + return getMachineArchType(getMachine()); } Expected COFFObjectFile::getStartAddress() const { @@ -1320,8 +1306,8 @@ COFFObjectFile::getRelocations(const coff_section *Sec) const { return #reloc_type; StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const { - switch (getMachine()) { - case COFF::IMAGE_FILE_MACHINE_AMD64: + switch (getArch()) { + case Triple::x86_64: switch (Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ADDR64); @@ -1344,7 +1330,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const { return "Unknown"; } break; - case COFF::IMAGE_FILE_MACHINE_ARMNT: + case Triple::thumb: switch (Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ABSOLUTE); LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM_ADDR32); @@ -1367,9 +1353,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const { return "Unknown"; } break; - case COFF::IMAGE_FILE_MACHINE_ARM64: - case COFF::IMAGE_FILE_MACHINE_ARM64EC: - case COFF::IMAGE_FILE_MACHINE_ARM64X: + case Triple::aarch64: switch (Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ABSOLUTE); LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_ARM64_ADDR32); @@ -1393,7 +1377,7 @@ StringRef COFFObjectFile::getRelocationTypeName(uint16_t Type) const { return "Unknown"; } break; - case COFF::IMAGE_FILE_MACHINE_I386: + case Triple::x86: switch (Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_ABSOLUTE); LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_I386_DIR16); @@ -1941,19 +1925,17 @@ ResourceSectionRef::getContents(const coff_resource_data_entry &Entry) { // the expected type. const coff_relocation &R = **RelocsForOffset.first; uint16_t RVAReloc; - switch (Obj->getMachine()) { - case COFF::IMAGE_FILE_MACHINE_I386: + switch (Obj->getArch()) { + case Triple::x86: RVAReloc = COFF::IMAGE_REL_I386_DIR32NB; break; - case COFF::IMAGE_FILE_MACHINE_AMD64: + case Triple::x86_64: RVAReloc = COFF::IMAGE_REL_AMD64_ADDR32NB; break; - case COFF::IMAGE_FILE_MACHINE_ARMNT: + case Triple::thumb: RVAReloc = COFF::IMAGE_REL_ARM_ADDR32NB; break; - case COFF::IMAGE_FILE_MACHINE_ARM64: - case COFF::IMAGE_FILE_MACHINE_ARM64EC: - case COFF::IMAGE_FILE_MACHINE_ARM64X: + case Triple::aarch64: RVAReloc = COFF::IMAGE_REL_ARM64_ADDR32NB; break; default: diff --git a/llvm/lib/Object/WindowsResource.cpp b/llvm/lib/Object/WindowsResource.cpp index 61ca49e..983c8e3 100644 --- a/llvm/lib/Object/WindowsResource.cpp +++ b/llvm/lib/Object/WindowsResource.cpp @@ -12,6 +12,7 @@ #include "llvm/Object/WindowsResource.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/FormatVariadic.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/ScopedPrinter.h" @@ -978,19 +979,17 @@ void WindowsResourceCOFFWriter::writeFirstSectionRelocations() { reinterpret_cast(BufferStart + CurrentOffset); Reloc->VirtualAddress = RelocationAddresses[i]; Reloc->SymbolTableIndex = NextSymbolIndex++; - switch (MachineType) { - case COFF::IMAGE_FILE_MACHINE_ARMNT: + switch (getMachineArchType(MachineType)) { + case Triple::thumb: Reloc->Type = COFF::IMAGE_REL_ARM_ADDR32NB; break; - case COFF::IMAGE_FILE_MACHINE_AMD64: + case Triple::x86_64: Reloc->Type = COFF::IMAGE_REL_AMD64_ADDR32NB; break; - case COFF::IMAGE_FILE_MACHINE_I386: + case Triple::x86: Reloc->Type = COFF::IMAGE_REL_I386_DIR32NB; break; - case COFF::IMAGE_FILE_MACHINE_ARM64: - case COFF::IMAGE_FILE_MACHINE_ARM64EC: - case COFF::IMAGE_FILE_MACHINE_ARM64X: + case Triple::aarch64: Reloc->Type = COFF::IMAGE_REL_ARM64_ADDR32NB; break; default: -- cgit v1.1 From 51107be7dd7f83a107b9c35c39b16081e38f7a54 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 12:14:23 +0100 Subject: [X86] Haswell/Broadwell/Skylake DPPS folded instructions use an extra port06 resource This is an extension to 07151f0241d3f893cb36eb2dbc395d4098f74a87 which handled SandyBridge so we at least model the regression identified in #14640 Confirmed by Agner + uops.info/uica (SkylakeServer also had an incorrect use of Port015 instead of just Port01) I raised #86669 as a proposal for a 'x86 unfold' pass that can unfold these (if we have the free registers) driven by the scheduler model. --- llvm/lib/Target/X86/X86SchedBroadwell.td | 6 ++++-- llvm/lib/Target/X86/X86SchedHaswell.td | 6 ++++-- llvm/lib/Target/X86/X86SchedSkylakeClient.td | 6 ++++-- llvm/lib/Target/X86/X86SchedSkylakeServer.td | 6 ++++-- .../test/tools/llvm-mca/X86/Broadwell/resources-avx1.s | 10 +++++----- .../tools/llvm-mca/X86/Broadwell/resources-sse41.s | 6 +++--- llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s | 10 +++++----- llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s | 6 +++--- .../tools/llvm-mca/X86/SkylakeClient/resources-avx1.s | 10 +++++----- .../tools/llvm-mca/X86/SkylakeClient/resources-sse41.s | 6 +++--- .../tools/llvm-mca/X86/SkylakeServer/resources-avx1.s | 18 +++++++++--------- .../tools/llvm-mca/X86/SkylakeServer/resources-sse41.s | 10 +++++----- 12 files changed, 54 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index 0027de8..b3ee7a8 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -324,8 +324,10 @@ defm : BWWriteResPair; // Fused Multiply defm : BWWriteResPair; // Fused Multiply Add (YMM/ZMM). defm : X86WriteResPairUnsupported; defm : BWWriteResPair; // Floating point double dot product. -defm : BWWriteResPair; // Floating point single dot product. -defm : BWWriteResPair; // Floating point single dot product (YMM). +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : BWWriteResPair; // Floating point fabs/fchs. defm : X86WriteRes; // Floating point rounding. defm : X86WriteRes; // Floating point rounding (YMM/ZMM). diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index a11b470..6c301a3 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -324,8 +324,10 @@ defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; -defm : HWWriteResPair; -defm : HWWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : HWWriteResPair; defm : X86WriteRes; defm : X86WriteRes; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeClient.td b/llvm/lib/Target/X86/X86SchedSkylakeClient.td index 4fa138f..3ee931f 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeClient.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeClient.td @@ -311,8 +311,10 @@ defm : SKLWriteResPair; defm : SKLWriteResPair; defm : X86WriteResPairUnsupported; defm : SKLWriteResPair; // Floating point double dot product. -defm : SKLWriteResPair; -defm : SKLWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : SKLWriteResPair; // Floating point fabs/fchs. defm : SKLWriteResPair; // Floating point rounding. defm : SKLWriteResPair; diff --git a/llvm/lib/Target/X86/X86SchedSkylakeServer.td b/llvm/lib/Target/X86/X86SchedSkylakeServer.td index 3da688c..a7dff0e 100644 --- a/llvm/lib/Target/X86/X86SchedSkylakeServer.td +++ b/llvm/lib/Target/X86/X86SchedSkylakeServer.td @@ -311,8 +311,10 @@ defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; defm : SKXWriteResPair; // Floating point double dot product. -defm : SKXWriteResPair; -defm : SKXWriteResPair; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; +defm : X86WriteRes; defm : SKXWriteResPair; // Floating point fabs/fchs. defm : SKXWriteResPair; // Floating point rounding. defm : SKXWriteResPair; diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index 98b8619..ca1faf6 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1189,9 +1189,9 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 14 1.00 * vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 6 19 2.00 * vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 215.25 235.25 176.17 176.17 38.00 424.25 2.25 12.67 +# CHECK-NEXT: - 257.00 216.25 235.25 176.17 176.17 38.00 424.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1899,9 +1899,9 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s index a2899b4..dcc5353 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s @@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 4 14 1.00 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 5 19 2.00 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 6 19 2.00 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67 +# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index 376070d..cff60c9 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1189,9 +1189,9 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 14 2.00 vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 6 20 2.00 * vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 14 2.00 vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 5 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 6 21 2.00 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 2 2 1.00 vextractps $1, %xmm0, %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 214.58 236.58 176.17 176.17 38.00 427.58 2.25 12.67 +# CHECK-NEXT: - 336.00 215.58 236.58 176.17 176.17 38.00 427.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1899,9 +1899,9 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s index 70d9398..c2d0773 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s @@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 4 14 2.00 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 5 20 2.00 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 6 20 2.00 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.33 22.33 25.67 25.67 5.00 80.33 - 1.67 +# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 1.00 - - - 1.00 - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - 2.00 1.00 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - 2.50 1.00 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s index c2e0217..ef5a9e3 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-avx1.s @@ -1189,9 +1189,9 @@ vzeroupper # CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 13 1.50 vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 6 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 4 13 1.50 vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 5 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 6 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 338.58 199.58 173.83 173.83 38.00 326.58 5.25 11.33 +# CHECK-NEXT: - 126.00 339.58 199.58 173.83 173.83 38.00 326.58 6.25 11.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1899,9 +1899,9 @@ vzeroupper # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - vdppd $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s index 6e11bb6..1d8d67f 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeClient/resources-sse41.s @@ -166,7 +166,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2 # CHECK-NEXT: 4 13 1.50 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 5 19 1.50 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 6 19 1.50 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 37.33 31.33 23.67 23.67 5.00 63.33 - 1.67 +# CHECK-NEXT: - - 37.83 31.33 23.67 23.67 5.00 63.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -281,7 +281,7 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 1.00 - - - 1.00 - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 1.00 - - dppd $22, (%rax), %xmm2 # CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - 1.50 1.50 0.50 0.50 - 1.00 - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s index de14ef7..cabb002 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-avx1.s @@ -1188,10 +1188,10 @@ vzeroupper # CHECK-NEXT: 2 16 3.00 * vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: 3 9 1.00 vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 4 15 1.00 * vdppd $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 4 13 1.33 vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: 5 19 1.33 * vdpps $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 4 13 1.33 vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: 5 20 1.33 * vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: 4 13 1.50 vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 6 19 1.50 * vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: 4 13 1.50 vdpps $22, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: 6 20 1.50 * vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: 1 3 1.00 vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: 2 1 1.00 * vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: 2 3 1.00 vextractps $1, %xmm0, %ecx @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 126.00 322.25 200.25 173.83 173.83 38.00 330.25 6.25 11.33 +# CHECK-NEXT: - 126.00 325.25 202.25 173.83 173.83 38.00 326.25 7.25 11.33 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -1898,10 +1898,10 @@ vzeroupper # CHECK-NEXT: - 3.00 1.00 - 0.50 0.50 - - - - vdivss (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 0.67 0.67 - - - 1.67 - - vdppd $22, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 1.67 - - vdppd $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - vdpps $22, %xmm0, %xmm1, %xmm2 -# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - vdpps $22, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - vdpps $22, %ymm0, %ymm1, %ymm2 -# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - vdpps $22, (%rax), %ymm1, %ymm2 +# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %xmm1, %xmm2 +# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - vdpps $22, %ymm0, %ymm1, %ymm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - vdpps $22, (%rax), %ymm1, %ymm2 # CHECK-NEXT: - - - - - - - 1.00 - - vextractf128 $1, %ymm0, %xmm2 # CHECK-NEXT: - - - - 0.33 0.33 1.00 - - 0.33 vextractf128 $1, %ymm0, (%rax) # CHECK-NEXT: - - 1.00 - - - - 1.00 - - vextractps $1, %xmm0, %ecx diff --git a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s index 15cd09b..e3f34fd 100644 --- a/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/SkylakeServer/resources-sse41.s @@ -165,8 +165,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 8 0.67 * blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: 3 9 1.00 dppd $22, %xmm0, %xmm2 # CHECK-NEXT: 4 15 1.00 * dppd $22, (%rax), %xmm2 -# CHECK-NEXT: 4 13 1.33 dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: 5 19 1.33 * dpps $22, (%rax), %xmm2 +# CHECK-NEXT: 4 13 1.50 dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: 6 19 1.50 * dpps $22, (%rax), %xmm2 # CHECK-NEXT: 2 3 1.00 extractps $1, %xmm0, %ecx # CHECK-NEXT: 3 2 1.00 * extractps $1, %xmm0, (%rax) # CHECK-NEXT: 1 1 1.00 insertps $1, %xmm0, %xmm2 @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 36.67 28.67 23.67 23.67 5.00 66.67 - 1.67 +# CHECK-NEXT: - - 38.17 29.67 23.67 23.67 5.00 64.67 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -280,8 +280,8 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 0.67 - - blendvps %xmm0, (%rax), %xmm2 # CHECK-NEXT: - - 0.67 0.67 - - - 1.67 - - dppd $22, %xmm0, %xmm2 # CHECK-NEXT: - - 0.67 0.67 0.50 0.50 - 1.67 - - dppd $22, (%rax), %xmm2 -# CHECK-NEXT: - - 1.00 1.00 - - - 2.00 - - dpps $22, %xmm0, %xmm2 -# CHECK-NEXT: - - 1.00 1.00 0.50 0.50 - 2.00 - - dpps $22, (%rax), %xmm2 +# CHECK-NEXT: - - 1.50 1.50 - - - 1.00 - - dpps $22, %xmm0, %xmm2 +# CHECK-NEXT: - - 2.00 1.50 0.50 0.50 - 1.00 0.50 - dpps $22, (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - extractps $1, %xmm0, %ecx # CHECK-NEXT: - - - - 0.33 0.33 1.00 1.00 - 0.33 extractps $1, %xmm0, (%rax) # CHECK-NEXT: - - - - - - - 1.00 - - insertps $1, %xmm0, %xmm2 -- cgit v1.1 From 52b18430ae105566f26152c0efc63998301b1134 Mon Sep 17 00:00:00 2001 From: AinsleySnow <772571228@qq.com> Date: Wed, 3 Apr 2024 19:45:50 +0800 Subject: [VP][DAGCombine] Use `simplifySelect` when combining vp.select. (#87342) Hi all, This patch is a follow-up of #79101. It migrates logic from `visitVSELECT` to `visitVP_SELECT` to simplify `vp.select`. With this patch we can do the following combinations: ``` vp.select undef, T, F --> T (if T is a constant), F otherwise vp.select , undef, F --> F vp.select , T, undef --> T vp.select false, T, F --> F vp.select , T, T --> T ``` I'm a total newbie to llvm and I'm sure there's room for improvements in this patch. Please let me know if you have any advice. Thank you in advance! --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 7 ++++ llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll | 53 +++++++++++++++++++++++++++ 2 files changed, 60 insertions(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2f46b23..b889e4f 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12056,6 +12056,13 @@ SDValue DAGCombiner::foldVSelectOfConstants(SDNode *N) { } SDValue DAGCombiner::visitVP_SELECT(SDNode *N) { + SDValue N0 = N->getOperand(0); + SDValue N1 = N->getOperand(1); + SDValue N2 = N->getOperand(2); + + if (SDValue V = DAG.simplifySelect(N0, N1, N2)) + return V; + if (SDValue V = foldBoolSelectToLogic(N, DAG)) return V; diff --git a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll index 0d52dd7..0a5e501 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vselect-vp.ll @@ -825,3 +825,56 @@ define @select_cond_x_cond( %x, @llvm.vp.select.nxv2i1( %x, %y, %x, i32 %evl) ret %a } + +define @select_undef_T_F( %x, %y, i32 zeroext %evl) { +; CHECK-LABEL: select_undef_T_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( poison, %x, %y, i32 %evl) + ret %a +} + +define @select_undef_undef_F( %x, i32 zeroext %evl) { +; CHECK-LABEL: select_undef_undef_F: +; CHECK: # %bb.0: +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( poison, undef, %x, i32 %evl) + ret %a +} + +define @select_unknown_undef_F( %x, %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_undef_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( %x, undef, %y, i32 %evl) + ret %a +} + +define @select_unknown_T_undef( %x, %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_T_undef: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( %x, %y, poison, i32 %evl) + ret %a +} + +define @select_false_T_F( %x, %y, %z, i32 zeroext %evl) { +; CHECK-LABEL: select_false_T_F: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v9 +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( zeroinitializer, %y, %z, i32 %evl) + ret %a +} + +define @select_unknown_T_T( %x, %y, i32 zeroext %evl) { +; CHECK-LABEL: select_unknown_T_T: +; CHECK: # %bb.0: +; CHECK-NEXT: vmv1r.v v0, v8 +; CHECK-NEXT: ret + %a = call @llvm.vp.select.nxv2i1( %x, %y, %y, i32 %evl) + ret %a +} -- cgit v1.1 From 98244c4e2acafb7568e8337088c6caaaffcb7831 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Apr 2024 08:04:43 -0400 Subject: [libc++] Upstream ptrauth support in libc++ and libc++abi (#84573) This is an exact upstreaming of the downstream diff. Minor simplifications can be made in the future but upstreaming as-is will make it easier for us to deal with downstream merge conflicts. Partially fixes #83805 --- libcxx/include/typeinfo | 14 +++++++++++++- libcxx/src/include/overridable_function.h | 12 ++++++++++++ libcxxabi/src/private_typeinfo.cpp | 19 +++++++++++++++++++ 3 files changed, 44 insertions(+), 1 deletion(-) diff --git a/libcxx/include/typeinfo b/libcxx/include/typeinfo index 1ae075e..d1c0de3 100644 --- a/libcxx/include/typeinfo +++ b/libcxx/include/typeinfo @@ -275,7 +275,19 @@ struct __type_info_implementations { __impl; }; -class _LIBCPP_EXPORTED_FROM_ABI type_info { +# if defined(__arm64__) && __has_cpp_attribute(clang::ptrauth_vtable_pointer) +# if __has_feature(ptrauth_type_info_discriminated_vtable_pointer) +# define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH \ + [[clang::ptrauth_vtable_pointer(process_independent, address_discrimination, type_discrimination)]] +# else +# define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH \ + [[clang::ptrauth_vtable_pointer(process_independent, no_address_discrimination, no_extra_discrimination)]] +# endif +# else +# define _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH +# endif + +class _LIBCPP_EXPORTED_FROM_ABI _LIBCPP_TYPE_INFO_VTABLE_POINTER_AUTH type_info { type_info& operator=(const type_info&); type_info(const type_info&); diff --git a/libcxx/src/include/overridable_function.h b/libcxx/src/include/overridable_function.h index 7b0fba1..fca66ea 100644 --- a/libcxx/src/include/overridable_function.h +++ b/libcxx/src/include/overridable_function.h @@ -13,6 +13,10 @@ #include <__config> #include +#if defined(__arm64e__) && __has_feature(ptrauth_calls) +# include +#endif + #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header #endif @@ -81,6 +85,14 @@ _LIBCPP_HIDE_FROM_ABI bool __is_function_overridden(_Ret (*__fptr)(_Args...)) no uintptr_t __end = reinterpret_cast(&__lcxx_override_end); uintptr_t __ptr = reinterpret_cast(__fptr); +#if defined(__arm64e__) && __has_feature(ptrauth_calls) + // We must pass a void* to ptrauth_strip since it only accepts a pointer type. Also, in particular, + // we must NOT pass a function pointer, otherwise we will strip the function pointer, and then attempt + // to authenticate and re-sign it when casting it to a uintptr_t again, which will fail because we just + // stripped the function pointer. See rdar://122927845. + __ptr = reinterpret_cast(ptrauth_strip(reinterpret_cast(__ptr), ptrauth_key_function_pointer)); +#endif + // Finally, the function was overridden if it falls outside of the section's bounds. return __ptr < __start || __ptr > __end; } diff --git a/libcxxabi/src/private_typeinfo.cpp b/libcxxabi/src/private_typeinfo.cpp index 5c68f3e..9e58501 100644 --- a/libcxxabi/src/private_typeinfo.cpp +++ b/libcxxabi/src/private_typeinfo.cpp @@ -51,6 +51,21 @@ #include #endif +#if __has_feature(ptrauth_calls) +#include +#endif + + +template +static inline +T * +get_vtable(T *vtable) { +#if __has_feature(ptrauth_calls) + vtable = ptrauth_strip(vtable, ptrauth_key_cxx_vtable_pointer); +#endif + return vtable; +} + static inline bool is_equal(const std::type_info* x, const std::type_info* y, bool use_strcmp) @@ -103,6 +118,7 @@ void dyn_cast_get_derived_info(derived_object_info* info, const void* static_ptr info->dynamic_type = *(reinterpret_cast(ptr_to_ti_proxy)); #else void **vtable = *static_cast(static_ptr); + vtable = get_vtable(vtable); info->offset_to_derived = reinterpret_cast(vtable[-2]); info->dynamic_ptr = static_cast(static_ptr) + info->offset_to_derived; info->dynamic_type = static_cast(vtable[-1]); @@ -561,6 +577,7 @@ __base_class_type_info::has_unambiguous_public_base(__dynamic_cast_info* info, offset_to_base = __offset_flags >> __offset_shift; if (is_virtual) { const char* vtable = *static_cast(adjustedPtr); + vtable = get_vtable(vtable); offset_to_base = update_offset_to_base(vtable, offset_to_base); } } else if (!is_virtual) { @@ -1501,6 +1518,7 @@ __base_class_type_info::search_above_dst(__dynamic_cast_info* info, if (__offset_flags & __virtual_mask) { const char* vtable = *static_cast(current_ptr); + vtable = get_vtable(vtable); offset_to_base = update_offset_to_base(vtable, offset_to_base); } __base_type->search_above_dst(info, dst_ptr, @@ -1521,6 +1539,7 @@ __base_class_type_info::search_below_dst(__dynamic_cast_info* info, if (__offset_flags & __virtual_mask) { const char* vtable = *static_cast(current_ptr); + vtable = get_vtable(vtable); offset_to_base = update_offset_to_base(vtable, offset_to_base); } __base_type->search_below_dst(info, -- cgit v1.1 From b699a9ba112cd9fc861eccfcdd2a7c9886423bde Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Apr 2024 08:21:11 -0400 Subject: [libc++] Update status page after the Tokyo meeting (#87395) --- libcxx/docs/FeatureTestMacroTable.rst | 870 +++++++++++---------- libcxx/docs/Status/Cxx2cIssues.csv | 22 + libcxx/docs/Status/Cxx2cPapers.csv | 17 + libcxx/include/version | 17 + .../algorithm.version.compile.pass.cpp | 56 +- .../atomic.version.compile.pass.cpp | 34 + .../deque.version.compile.pass.cpp | 44 +- .../filesystem.version.compile.pass.cpp | 49 +- .../forward_list.version.compile.pass.cpp | 48 +- .../functional.version.compile.pass.cpp | 34 + .../list.version.compile.pass.cpp | 48 +- .../optional.version.compile.pass.cpp | 34 + .../random.version.compile.pass.cpp | 71 ++ .../ranges.version.compile.pass.cpp | 90 ++- .../string.version.compile.pass.cpp | 62 +- .../tuple.version.compile.pass.cpp | 52 +- .../utility.version.compile.pass.cpp | 34 + .../variant.version.compile.pass.cpp | 34 + .../vector.version.compile.pass.cpp | 48 +- .../version.version.compile.pass.cpp | 669 +++++++++++----- .../generate_feature_test_macro_components.py | 87 ++- 21 files changed, 1684 insertions(+), 736 deletions(-) create mode 100644 libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp diff --git a/libcxx/docs/FeatureTestMacroTable.rst b/libcxx/docs/FeatureTestMacroTable.rst index b213f43..014ac1c 100644 --- a/libcxx/docs/FeatureTestMacroTable.rst +++ b/libcxx/docs/FeatureTestMacroTable.rst @@ -21,437 +21,451 @@ Status :name: feature-status-table :widths: auto - =================================================== ================= - Macro Name Value - =================================================== ================= + ========================================================== ================= + Macro Name Value + ========================================================== ================= **C++14** - --------------------------------------------------------------------- - ``__cpp_lib_chrono_udls`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_complex_udls`` ``201309L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_exchange_function`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_generic_associative_lookup`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_integer_sequence`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_integral_constant_callable`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_final`` ``201402L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_null_pointer`` ``201309L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_make_reverse_iterator`` ``201402L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_make_unique`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_null_iterators`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_quoted_string_io`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_result_of_sfinae`` ``201210L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_robust_nonmodifying_seq_ops`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shared_timed_mutex`` ``201402L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_string_udls`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_transformation_trait_aliases`` ``201304L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_transparent_operators`` ``201210L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_tuple_element_t`` ``201402L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_tuples_by_type`` ``201304L`` - --------------------------------------------------- ----------------- + ---------------------------------------------------------------------------- + ``__cpp_lib_chrono_udls`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_complex_udls`` ``201309L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_exchange_function`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_generic_associative_lookup`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_integer_sequence`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_integral_constant_callable`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_final`` ``201402L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_null_pointer`` ``201309L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_make_reverse_iterator`` ``201402L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_make_unique`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_null_iterators`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_quoted_string_io`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_result_of_sfinae`` ``201210L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_robust_nonmodifying_seq_ops`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shared_timed_mutex`` ``201402L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_string_udls`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_transformation_trait_aliases`` ``201304L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_transparent_operators`` ``201210L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_tuple_element_t`` ``201402L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_tuples_by_type`` ``201304L`` + ---------------------------------------------------------- ----------------- **C++17** - --------------------------------------------------------------------- - ``__cpp_lib_addressof_constexpr`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_allocator_traits_is_always_equal`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_any`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_apply`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_array_constexpr`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_as_const`` ``201510L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_is_always_lock_free`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bool_constant`` ``201505L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_boyer_moore_searcher`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_byte`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_chrono`` ``201611L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_clamp`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_enable_shared_from_this`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_execution`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_filesystem`` ``201703L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_gcd_lcm`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_hardware_interference_size`` ``201703L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_has_unique_object_representations`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_hypot`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_incomplete_container_elements`` ``201505L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_invoke`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_aggregate`` ``201703L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_invocable`` ``201703L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_swappable`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_launder`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_logical_traits`` ``201510L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_make_from_tuple`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_map_try_emplace`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_math_special_functions`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_memory_resource`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_node_extract`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_nonmember_container_access`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_not_fn`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_optional`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_parallel_algorithm`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_raw_memory_algorithms`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_sample`` ``201603L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_scoped_lock`` ``201703L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shared_mutex`` ``201505L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shared_ptr_arrays`` ``201611L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shared_ptr_weak_type`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_string_view`` ``201606L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_to_chars`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_transparent_operators`` ``201510L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_type_trait_variable_templates`` ``201510L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_uncaught_exceptions`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_unordered_map_try_emplace`` ``201411L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_variant`` ``202102L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_void_t`` ``201411L`` - --------------------------------------------------- ----------------- + ---------------------------------------------------------------------------- + ``__cpp_lib_addressof_constexpr`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_allocator_traits_is_always_equal`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_any`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_apply`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_array_constexpr`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_as_const`` ``201510L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_is_always_lock_free`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bool_constant`` ``201505L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_boyer_moore_searcher`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_byte`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_chrono`` ``201611L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_clamp`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_enable_shared_from_this`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_execution`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_filesystem`` ``201703L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_gcd_lcm`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_hardware_interference_size`` ``201703L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_has_unique_object_representations`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_hypot`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_incomplete_container_elements`` ``201505L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_invoke`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_aggregate`` ``201703L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_invocable`` ``201703L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_swappable`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_launder`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_logical_traits`` ``201510L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_make_from_tuple`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_map_try_emplace`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_math_special_functions`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_memory_resource`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_node_extract`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_nonmember_container_access`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_not_fn`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_optional`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_parallel_algorithm`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_raw_memory_algorithms`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_sample`` ``201603L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_scoped_lock`` ``201703L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shared_mutex`` ``201505L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shared_ptr_arrays`` ``201611L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shared_ptr_weak_type`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_string_view`` ``201606L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_chars`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_transparent_operators`` ``201510L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_type_trait_variable_templates`` ``201510L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_uncaught_exceptions`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_unordered_map_try_emplace`` ``201411L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_variant`` ``202102L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_void_t`` ``201411L`` + ---------------------------------------------------------- ----------------- **C++20** - --------------------------------------------------------------------- - ``__cpp_lib_array_constexpr`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_assume_aligned`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_flag_test`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_float`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_lock_free_type_aliases`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_ref`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_shared_ptr`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_value_initialization`` ``201911L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_atomic_wait`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_barrier`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bind_front`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bit_cast`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bitops`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bounded_array_traits`` ``201902L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_char8_t`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_concepts`` ``202002L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_algorithms`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_complex`` ``201711L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_dynamic_alloc`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_functional`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_iterator`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_memory`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_numeric`` ``201911L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_string`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_string_view`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_tuple`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_utility`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_vector`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_coroutine`` ``201902L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_destroying_delete`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_endian`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_erase_if`` ``202002L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_execution`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_format`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_format_uchar`` ``202311L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_generic_unordered_lookup`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_int_pow2`` ``202002L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_integer_comparison_functions`` ``202002L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_interpolate`` ``201902L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_constant_evaluated`` ``201811L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_layout_compatible`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_is_nothrow_convertible`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_pointer_interconvertible`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_jthread`` ``201911L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_latch`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_list_remove_return_type`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_math_constants`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_move_iterator_concept`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_polymorphic_allocator`` ``201902L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_remove_cvref`` ``201711L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_semaphore`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shared_ptr_arrays`` ``201707L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_shift`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_smart_ptr_for_overwrite`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_source_location`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_span`` ``202002L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ssize`` ``201902L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_starts_ends_with`` ``201711L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_string_view`` ``201803L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_syncbuf`` ``201803L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_three_way_comparison`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_to_address`` ``201711L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_to_array`` ``201907L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_type_identity`` ``201806L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_unwrap_ref`` ``201811L`` - --------------------------------------------------- ----------------- + ---------------------------------------------------------------------------- + ``__cpp_lib_array_constexpr`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_assume_aligned`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_flag_test`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_float`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_lock_free_type_aliases`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_ref`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_shared_ptr`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_value_initialization`` ``201911L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_wait`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_barrier`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bind_front`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bit_cast`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bitops`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bounded_array_traits`` ``201902L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_char8_t`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_concepts`` ``202002L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_algorithms`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_complex`` ``201711L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_dynamic_alloc`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_functional`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_iterator`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_memory`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_numeric`` ``201911L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_string`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_string_view`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_tuple`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_utility`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_vector`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_coroutine`` ``201902L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_destroying_delete`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_endian`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_erase_if`` ``202002L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_execution`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_format`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_format_uchar`` ``202311L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_generic_unordered_lookup`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_int_pow2`` ``202002L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_integer_comparison_functions`` ``202002L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_interpolate`` ``201902L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_constant_evaluated`` ``201811L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_layout_compatible`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_nothrow_convertible`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_pointer_interconvertible`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_jthread`` ``201911L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_latch`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_list_remove_return_type`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_math_constants`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_move_iterator_concept`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_polymorphic_allocator`` ``201902L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_remove_cvref`` ``201711L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_semaphore`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shared_ptr_arrays`` ``201707L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_shift`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_smart_ptr_for_overwrite`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_source_location`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_span`` ``202002L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ssize`` ``201902L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_starts_ends_with`` ``201711L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_string_view`` ``201803L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_syncbuf`` ``201803L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_three_way_comparison`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_address`` ``201711L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_array`` ``201907L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_type_identity`` ``201806L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_unwrap_ref`` ``201811L`` + ---------------------------------------------------------- ----------------- **C++23** - --------------------------------------------------------------------- - ``__cpp_lib_adaptor_iterator_pair_constructor`` ``202106L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_allocate_at_least`` ``202302L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_associative_heterogeneous_erasure`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_bind_back`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_byteswap`` ``202110L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_bitset`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_charconv`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_cmath`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_memory`` ``202202L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_constexpr_typeinfo`` ``202106L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_expected`` ``202211L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_format_ranges`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_formatters`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_forward_like`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_invoke_r`` ``202106L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ios_noreplace`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_is_scoped_enum`` ``202011L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_mdspan`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_move_only_function`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_optional`` ``202110L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_out_ptr`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_print`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_as_const`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_as_rvalue`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_chunk`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_chunk_by`` ``202202L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_contains`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_iota`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_join_with`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_repeat`` ``202207L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_slide`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_starts_ends_with`` ``202106L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_to_container`` ``202202L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_ranges_zip`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_reference_from_temporary`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_spanstream`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_stacktrace`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_stdatomic_h`` ``202011L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_string_contains`` ``202011L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_string_resize_and_overwrite`` ``202110L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_to_string`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_to_underlying`` ``202102L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_tuple_like`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_unreachable`` ``202202L`` - --------------------------------------------------- ----------------- + ---------------------------------------------------------------------------- + ``__cpp_lib_adaptor_iterator_pair_constructor`` ``202106L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_allocate_at_least`` ``202302L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_associative_heterogeneous_erasure`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bind_back`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_byteswap`` ``202110L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_bitset`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_charconv`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_cmath`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_memory`` ``202202L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constexpr_typeinfo`` ``202106L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_expected`` ``202211L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_format_path`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_format_ranges`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_formatters`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_forward_like`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_invoke_r`` ``202106L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ios_noreplace`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_is_scoped_enum`` ``202011L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_mdspan`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_move_only_function`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_optional`` ``202110L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_out_ptr`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_print`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_as_const`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_as_rvalue`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_chunk`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_chunk_by`` ``202202L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_contains`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_iota`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_join_with`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_repeat`` ``202207L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_slide`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_starts_ends_with`` ``202106L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_to_container`` ``202202L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_zip`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_reference_from_temporary`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_spanstream`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_stacktrace`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_stdatomic_h`` ``202011L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_string_contains`` ``202011L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_string_resize_and_overwrite`` ``202110L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_string`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_underlying`` ``202102L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_tuple_like`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_unreachable`` ``202202L`` + ---------------------------------------------------------- ----------------- **C++26** - --------------------------------------------------------------------- - ``__cpp_lib_associative_heterogeneous_insertion`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_bind_back`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_bind_front`` ``202306L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_bitset`` ``202306L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_copyable_function`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_debugging`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_algorithm`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_array`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_cstring`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_expected`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_mdspan`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_optional`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_string_view`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_freestanding_variant`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_fstream_native_handle`` ``202306L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_function_ref`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_hazard_pointer`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_linalg`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_out_ptr`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_ratio`` ``202306L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_rcu`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_saturation_arithmetic`` ``202311L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_smart_ptr_owner_equality`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_span_at`` ``202311L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_span_initializer_list`` ``202311L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_sstream_from_string_view`` ``202306L`` - --------------------------------------------------- ----------------- - ``__cpp_lib_submdspan`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_text_encoding`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_to_chars`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_tuple_like`` *unimplemented* - --------------------------------------------------- ----------------- - ``__cpp_lib_within_lifetime`` *unimplemented* - =================================================== ================= + ---------------------------------------------------------------------------- + ``__cpp_lib_associative_heterogeneous_insertion`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_atomic_min_max`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bind_back`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bind_front`` ``202306L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_bitset`` ``202306L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_constrained_equality`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_copyable_function`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_debugging`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_default_template_type_for_algorithm_values`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_algorithm`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_array`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_cstring`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_expected`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_mdspan`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_optional`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_string_view`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_freestanding_variant`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_fstream_native_handle`` ``202306L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_function_ref`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_generate_random`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_hazard_pointer`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_linalg`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_out_ptr`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ranges_concat`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_ratio`` ``202306L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_rcu`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_reference_wrapper`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_saturation_arithmetic`` ``202311L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_smart_ptr_owner_equality`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_span_at`` ``202311L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_span_initializer_list`` ``202311L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_sstream_from_string_view`` ``202306L`` + ---------------------------------------------------------- ----------------- + ``__cpp_lib_submdspan`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_text_encoding`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_to_chars`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_tuple_like`` *unimplemented* + ---------------------------------------------------------- ----------------- + ``__cpp_lib_within_lifetime`` *unimplemented* + ========================================================== ================= diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index 58e9958..f471c43 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -40,5 +40,27 @@ "`3990 `__","Program-defined specializations of ``std::tuple`` and ``std::variant`` can't be properly supported","Kona November 2023","","","" "`4001 `__","``iota_view`` should provide ``empty``","Kona November 2023","","","|ranges|" "","","","","","" +"`3767 `__","``codecvt`` incorrectly added to locale","Tokyo March 2024","","","" +"`3919 `__","``enumerate_view`` may invoke UB for sized common non-forward underlying ranges","Tokyo March 2024","","","|ranges|" +"`3950 `__","``std::basic_string_view`` comparison operators are overspecified","Tokyo March 2024","","","" +"`3975 `__","Specializations of ``basic_format_context`` should not be permitted","Tokyo March 2024","","","|format|" +"`3984 `__","``ranges::to``'s recursion branch may be ill-formed","Tokyo March 2024","","","|ranges|" +"`4011 `__","``""Effects: Equivalent to return""`` in ``[span.elem]``","Tokyo March 2024","","","" +"`4012 `__","``common_view::begin/end`` are missing the ``simple-view`` check","Tokyo March 2024","","","|ranges|" +"`4013 `__","``lazy_split_view::outer-iterator::value_type`` should not provide default constructor","Tokyo March 2024","","","|ranges|" +"`4016 `__","container-insertable checks do not match what container-inserter does","Tokyo March 2024","","","" +"`4023 `__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","","","" +"`4025 `__","Move assignment operator of ``std::expected`` should not be conditionally deleted","Tokyo March 2024","","","" +"`4030 `__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","","","" +"`4031 `__","``bad_expected_access`` member functions should be ``noexcept``","Tokyo March 2024","","","" +"`4035 `__","``single_view`` should provide ``empty``","Tokyo March 2024","","","|ranges|" +"`4036 `__","``__alignof_is_defined`` is only implicitly specified in C++ and not yet deprecated","Tokyo March 2024","","","" +"`4037 `__","Static data members of ``ctype_base`` are not yet required to be usable in constant expressions","Tokyo March 2024","","","" +"`4038 `__","``std::text_encoding::aliases_view`` should have constexpr iterators","Tokyo March 2024","","","" +"`4043 `__","""ASCII"" is not a registered character encoding","Tokyo March 2024","","","" +"`4045 `__","``tuple`` can create dangling references from ``tuple-like``","Tokyo March 2024","","","" +"`4053 `__","Unary call to ``std::views::repeat`` does not decay the argument","Tokyo March 2024","","","|ranges|" +"`4054 `__","Repeating a ``repeat_view`` should repeat the view","Tokyo March 2024","","","|ranges|" +"","","","","","" "`3343 `__","Ordering of calls to ``unlock()`` and ``notify_all()`` in Effects element of ``notify_all_at_thread_exit()`` should be reversed","Not Yet Adopted","|Complete|","16.0","" "","","","","","" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 4a5443d..efccd1694 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -46,3 +46,20 @@ "`P2264R7 `__","LWG","Make ``assert()`` macro user friendly for C and C++","Kona November 2023","","","" "`P1673R13 `__","LWG","A free function linear algebra interface based on the BLAS","Kona November 2023","","","" "","","","","","","" +"`P2875R4 `__","LWG","Undeprecate ``polymorphic_allocator::destroy`` for C++26","Tokyo March 2024","","","" +"`P2867R2 `__","LWG","Remove Deprecated ``strstreams`` From C++26","Tokyo March 2024","","","" +"`P2869R4 `__","LWG","Remove Deprecated ``shared_ptr`` Atomic Access APIs from C++26","Tokyo March 2024","","","" +"`P2872R3 `__","LWG","Remove ``wstring_convert`` From C++26","Tokyo March 2024","","","" +"`P3107R5 `__","LWG","Permit an efficient implementation of ``std::print``","Tokyo March 2024","","","|format| |DR|" +"`P3142R0 `__","LWG","Printing Blank Lines with ``println``","Tokyo March 2024","","","|format|" +"`P2845R8 `__","LWG","Formatting of ``std::filesystem::path``","Tokyo March 2024","","","|format|" +"`P0493R5 `__","LWG","Atomic minimum/maximum","Tokyo March 2024","","","" +"`P2542R8 `__","LWG","``views::concat``","Tokyo March 2024","","","|ranges|" +"`P2591R5 `__","LWG","Concatenation of strings and string views","Tokyo March 2024","","","" +"`P2248R8 `__","LWG","Enabling list-initialization for algorithms","Tokyo March 2024","","","" +"`P2810R4 `__","LWG","``is_debugger_present`` ``is_replaceable``","Tokyo March 2024","","","" +"`P1068R11 `__","LWG","Vector API for random number generation","Tokyo March 2024","","","" +"`P2944R3 `__","LWG","Comparisons for ``reference_wrapper``","Tokyo March 2024","","","" +"`P2642R6 `__","LWG","Padded ``mdspan`` layouts","Tokyo March 2024","","","" +"`P3029R1 `__","LWG","Better ``mdspan``'s CTAD","Tokyo March 2024","","","" +"","","","","","","" diff --git a/libcxx/include/version b/libcxx/include/version index 3bd296e..90dc1b2 100644 --- a/libcxx/include/version +++ b/libcxx/include/version @@ -35,6 +35,7 @@ __cpp_lib_atomic_flag_test 201907L __cpp_lib_atomic_float 201711L __cpp_lib_atomic_is_always_lock_free 201603L __cpp_lib_atomic_lock_free_type_aliases 201907L +__cpp_lib_atomic_min_max 202403L __cpp_lib_atomic_ref 201806L __cpp_lib_atomic_shared_ptr 201711L __cpp_lib_atomic_value_initialization 201911L @@ -77,9 +78,14 @@ __cpp_lib_constexpr_tuple 201811L __cpp_lib_constexpr_typeinfo 202106L __cpp_lib_constexpr_utility 201811L __cpp_lib_constexpr_vector 201907L +__cpp_lib_constrained_equality 202403L + __cpp_lib_copyable_function 202306L __cpp_lib_coroutine 201902L __cpp_lib_debugging 202311L +__cpp_lib_default_template_type_for_algorithm_values 202403L + + __cpp_lib_destroying_delete 201806L __cpp_lib_enable_shared_from_this 201603L __cpp_lib_endian 201907L @@ -92,6 +98,7 @@ __cpp_lib_execution 201902L __cpp_lib_expected 202211L __cpp_lib_filesystem 201703L __cpp_lib_format 202106L +__cpp_lib_format_path 202403L __cpp_lib_format_ranges 202207L __cpp_lib_format_uchar 202311L __cpp_lib_formatters 202302L @@ -107,6 +114,7 @@ __cpp_lib_freestanding_variant 202311L __cpp_lib_fstream_native_handle 202306L __cpp_lib_function_ref 202306L __cpp_lib_gcd_lcm 201606L +__cpp_lib_generate_random 202403L __cpp_lib_generic_associative_lookup 201304L __cpp_lib_generic_unordered_lookup 201811L __cpp_lib_hardware_interference_size 201703L @@ -170,6 +178,7 @@ __cpp_lib_ranges_as_const 202207L __cpp_lib_ranges_as_rvalue 202207L __cpp_lib_ranges_chunk 202202L __cpp_lib_ranges_chunk_by 202202L +__cpp_lib_ranges_concat 202403L __cpp_lib_ranges_contains 202207L __cpp_lib_ranges_iota 202202L __cpp_lib_ranges_join_with 202202L @@ -185,6 +194,7 @@ __cpp_lib_ratio 202306L __cpp_lib_raw_memory_algorithms 201606L __cpp_lib_rcu 202306L __cpp_lib_reference_from_temporary 202202L +__cpp_lib_reference_wrapper 202403L __cpp_lib_remove_cvref 201711L __cpp_lib_result_of_sfinae 201210L __cpp_lib_robust_nonmodifying_seq_ops 201304L @@ -448,6 +458,7 @@ __cpp_lib_within_lifetime 202306L # define __cpp_lib_constexpr_memory 202202L # define __cpp_lib_constexpr_typeinfo 202106L # define __cpp_lib_expected 202211L +// # define __cpp_lib_format_path 202403L # define __cpp_lib_format_ranges 202207L // # define __cpp_lib_formatters 202302L # define __cpp_lib_forward_like 202207L @@ -486,13 +497,16 @@ __cpp_lib_within_lifetime 202306L #if _LIBCPP_STD_VER >= 26 // # define __cpp_lib_associative_heterogeneous_insertion 202306L +// # define __cpp_lib_atomic_min_max 202403L # undef __cpp_lib_bind_back // # define __cpp_lib_bind_back 202306L # undef __cpp_lib_bind_front # define __cpp_lib_bind_front 202306L # define __cpp_lib_bitset 202306L +// # define __cpp_lib_constrained_equality 202403L // # define __cpp_lib_copyable_function 202306L // # define __cpp_lib_debugging 202311L +// # define __cpp_lib_default_template_type_for_algorithm_values 202403L // # define __cpp_lib_freestanding_algorithm 202311L // # define __cpp_lib_freestanding_array 202311L // # define __cpp_lib_freestanding_cstring 202306L @@ -505,12 +519,15 @@ __cpp_lib_within_lifetime 202306L # define __cpp_lib_fstream_native_handle 202306L # endif // # define __cpp_lib_function_ref 202306L +// # define __cpp_lib_generate_random 202403L // # define __cpp_lib_hazard_pointer 202306L // # define __cpp_lib_linalg 202311L # undef __cpp_lib_out_ptr // # define __cpp_lib_out_ptr 202311L +// # define __cpp_lib_ranges_concat 202403L # define __cpp_lib_ratio 202306L // # define __cpp_lib_rcu 202306L +// # define __cpp_lib_reference_wrapper 202403L # define __cpp_lib_saturation_arithmetic 202311L // # define __cpp_lib_smart_ptr_owner_equality 202306L # define __cpp_lib_span_at 202311L diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp index ece13b0..8ccd252 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/algorithm.version.compile.pass.cpp @@ -15,17 +15,18 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_clamp 201603L [C++17] - __cpp_lib_constexpr_algorithms 201806L [C++20] - __cpp_lib_freestanding_algorithm 202311L [C++26] - __cpp_lib_parallel_algorithm 201603L [C++17] - __cpp_lib_ranges 202207L [C++20] - __cpp_lib_ranges_contains 202207L [C++23] - __cpp_lib_ranges_starts_ends_with 202106L [C++23] - __cpp_lib_robust_nonmodifying_seq_ops 201304L [C++14] - __cpp_lib_sample 201603L [C++17] - __cpp_lib_shift 201806L [C++20] +/* Constant Value + __cpp_lib_clamp 201603L [C++17] + __cpp_lib_constexpr_algorithms 201806L [C++20] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_freestanding_algorithm 202311L [C++26] + __cpp_lib_parallel_algorithm 201603L [C++17] + __cpp_lib_ranges 202207L [C++20] + __cpp_lib_ranges_contains 202207L [C++23] + __cpp_lib_ranges_starts_ends_with 202106L [C++23] + __cpp_lib_robust_nonmodifying_seq_ops 201304L [C++14] + __cpp_lib_sample 201603L [C++17] + __cpp_lib_shift 201806L [C++20] */ #include @@ -41,6 +42,10 @@ # error "__cpp_lib_constexpr_algorithms should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should not be defined before c++26" # endif @@ -83,6 +88,10 @@ # error "__cpp_lib_constexpr_algorithms should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should not be defined before c++26" # endif @@ -131,6 +140,10 @@ # error "__cpp_lib_constexpr_algorithms should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should not be defined before c++26" # endif @@ -194,6 +207,10 @@ # error "__cpp_lib_constexpr_algorithms should have the value 201806L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should not be defined before c++26" # endif @@ -263,6 +280,10 @@ # error "__cpp_lib_constexpr_algorithms should have the value 201806L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should not be defined before c++26" # endif @@ -339,6 +360,19 @@ # endif # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_freestanding_algorithm # error "__cpp_lib_freestanding_algorithm should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp index 86315c2..c907b7d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/atomic.version.compile.pass.cpp @@ -20,6 +20,7 @@ __cpp_lib_atomic_float 201711L [C++20] __cpp_lib_atomic_is_always_lock_free 201603L [C++17] __cpp_lib_atomic_lock_free_type_aliases 201907L [C++20] + __cpp_lib_atomic_min_max 202403L [C++26] __cpp_lib_atomic_ref 201806L [C++20] __cpp_lib_atomic_shared_ptr 201711L [C++20] __cpp_lib_atomic_value_initialization 201911L [C++20] @@ -48,6 +49,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -86,6 +91,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -127,6 +136,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -183,6 +196,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++20" @@ -278,6 +295,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++23" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++23" @@ -374,6 +395,19 @@ # endif # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should be defined in c++26" +# endif +# if __cpp_lib_atomic_min_max != 202403L +# error "__cpp_lib_atomic_min_max should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.compile.pass.cpp index 4a398e2..720557f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/deque.version.compile.pass.cpp @@ -15,11 +15,12 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_ranges_to_container 202202L [C++23] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_ranges_to_container 202202L [C++23] */ #include @@ -31,6 +32,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -49,6 +54,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -70,6 +79,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -94,6 +107,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -121,6 +138,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++23" # endif @@ -151,6 +172,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp index 3f03e8b..308cc2d 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/filesystem.version.compile.pass.cpp @@ -17,9 +17,10 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_char8_t 201907L [C++20] - __cpp_lib_filesystem 201703L [C++17] +/* Constant Value + __cpp_lib_char8_t 201907L [C++20] + __cpp_lib_filesystem 201703L [C++17] + __cpp_lib_format_path 202403L [C++23] */ #include @@ -35,6 +36,10 @@ # error "__cpp_lib_filesystem should not be defined before c++17" # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + #elif TEST_STD_VER == 14 # ifdef __cpp_lib_char8_t @@ -45,6 +50,10 @@ # error "__cpp_lib_filesystem should not be defined before c++17" # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + #elif TEST_STD_VER == 17 # ifdef __cpp_lib_char8_t @@ -64,6 +73,10 @@ # endif # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + #elif TEST_STD_VER == 20 # if defined(__cpp_char8_t) @@ -92,6 +105,10 @@ # endif # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + #elif TEST_STD_VER == 23 # if defined(__cpp_char8_t) @@ -120,6 +137,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_format_path +# error "__cpp_lib_format_path should be defined in c++23" +# endif +# if __cpp_lib_format_path != 202403L +# error "__cpp_lib_format_path should have the value 202403L in c++23" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" +# endif +# endif + #elif TEST_STD_VER > 23 # if defined(__cpp_char8_t) @@ -148,5 +178,18 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_format_path +# error "__cpp_lib_format_path should be defined in c++26" +# endif +# if __cpp_lib_format_path != 202403L +# error "__cpp_lib_format_path should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" +# endif +# endif + #endif // TEST_STD_VER > 23 diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.compile.pass.cpp index b163943..9305cf0 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/forward_list.version.compile.pass.cpp @@ -15,13 +15,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_incomplete_container_elements 201505L [C++17] - __cpp_lib_list_remove_return_type 201806L [C++20] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_ranges_to_container 202202L [C++23] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_incomplete_container_elements 201505L [C++17] + __cpp_lib_list_remove_return_type 201806L [C++20] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_ranges_to_container 202202L [C++23] */ #include @@ -33,6 +34,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -59,6 +64,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -88,6 +97,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -123,6 +136,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -164,6 +181,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++23" # endif @@ -208,6 +229,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp index 72c96c6..fa4d9ba 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/functional.version.compile.pass.cpp @@ -29,6 +29,7 @@ __cpp_lib_move_only_function 202110L [C++23] __cpp_lib_not_fn 201603L [C++17] __cpp_lib_ranges 202207L [C++20] + __cpp_lib_reference_wrapper 202403L [C++26] __cpp_lib_result_of_sfinae 201210L [C++14] __cpp_lib_transparent_operators 201210L [C++14] 201510L [C++17] @@ -84,6 +85,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifdef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should not be defined before c++14" # endif @@ -142,6 +147,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should be defined in c++14" # endif @@ -215,6 +224,10 @@ # error "__cpp_lib_ranges should not be defined before c++20" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should be defined in c++17" # endif @@ -297,6 +310,10 @@ # error "__cpp_lib_ranges should have the value 202207L in c++20" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should be defined in c++20" # endif @@ -403,6 +420,10 @@ # error "__cpp_lib_ranges should have the value 202207L in c++23" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should be defined in c++23" # endif @@ -527,6 +548,19 @@ # error "__cpp_lib_ranges should have the value 202207L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should be defined in c++26" +# endif +# if __cpp_lib_reference_wrapper != 202403L +# error "__cpp_lib_reference_wrapper should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_result_of_sfinae # error "__cpp_lib_result_of_sfinae should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp index 48bff77..1222561 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/list.version.compile.pass.cpp @@ -15,13 +15,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_incomplete_container_elements 201505L [C++17] - __cpp_lib_list_remove_return_type 201806L [C++20] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_ranges_to_container 202202L [C++23] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_incomplete_container_elements 201505L [C++17] + __cpp_lib_list_remove_return_type 201806L [C++20] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_ranges_to_container 202202L [C++23] */ #include @@ -33,6 +34,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -59,6 +64,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should not be defined before c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -88,6 +97,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++17" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -123,6 +136,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -164,6 +181,10 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++23" # endif @@ -208,6 +229,19 @@ # error "__cpp_lib_allocator_traits_is_always_equal should have the value 201411L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp index 99716d8..15350a9 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/optional.version.compile.pass.cpp @@ -16,6 +16,7 @@ // Test the feature test macros defined by /* Constant Value + __cpp_lib_constrained_equality 202403L [C++26] __cpp_lib_freestanding_optional 202311L [C++26] __cpp_lib_optional 201606L [C++17] 202110L [C++23] @@ -26,6 +27,10 @@ #if TEST_STD_VER < 14 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should not be defined before c++26" # endif @@ -36,6 +41,10 @@ #elif TEST_STD_VER == 14 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should not be defined before c++26" # endif @@ -46,6 +55,10 @@ #elif TEST_STD_VER == 17 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should not be defined before c++26" # endif @@ -59,6 +72,10 @@ #elif TEST_STD_VER == 20 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should not be defined before c++26" # endif @@ -72,6 +89,10 @@ #elif TEST_STD_VER == 23 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should not be defined before c++26" # endif @@ -86,6 +107,19 @@ #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202403L +# error "__cpp_lib_constrained_equality should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_freestanding_optional # error "__cpp_lib_freestanding_optional should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp new file mode 100644 index 0000000..1f138d9 --- /dev/null +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/random.version.compile.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// WARNING: This test was generated by generate_feature_test_macro_components.py +// and should not be edited manually. +// +// clang-format off + +// + +// Test the feature test macros defined by + +/* Constant Value + __cpp_lib_generate_random 202403L [C++26] +*/ + +#include +#include "test_macros.h" + +#if TEST_STD_VER < 14 + +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 14 + +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 17 + +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 20 + +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + +#elif TEST_STD_VER == 23 + +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + +#elif TEST_STD_VER > 23 + +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should be defined in c++26" +# endif +# if __cpp_lib_generate_random != 202403L +# error "__cpp_lib_generate_random should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined because it is unimplemented in libc++!" +# endif +# endif + +#endif // TEST_STD_VER > 23 + diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp index aa3a496..30feacd 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/ranges.version.compile.pass.cpp @@ -15,17 +15,19 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_ranges 202207L [C++20] - __cpp_lib_ranges_as_const 202207L [C++23] - __cpp_lib_ranges_as_rvalue 202207L [C++23] - __cpp_lib_ranges_chunk 202202L [C++23] - __cpp_lib_ranges_chunk_by 202202L [C++23] - __cpp_lib_ranges_join_with 202202L [C++23] - __cpp_lib_ranges_repeat 202207L [C++23] - __cpp_lib_ranges_slide 202202L [C++23] - __cpp_lib_ranges_to_container 202202L [C++23] - __cpp_lib_ranges_zip 202110L [C++23] +/* Constant Value + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_ranges 202207L [C++20] + __cpp_lib_ranges_as_const 202207L [C++23] + __cpp_lib_ranges_as_rvalue 202207L [C++23] + __cpp_lib_ranges_chunk 202202L [C++23] + __cpp_lib_ranges_chunk_by 202202L [C++23] + __cpp_lib_ranges_concat 202403L [C++26] + __cpp_lib_ranges_join_with 202202L [C++23] + __cpp_lib_ranges_repeat 202207L [C++23] + __cpp_lib_ranges_slide 202202L [C++23] + __cpp_lib_ranges_to_container 202202L [C++23] + __cpp_lib_ranges_zip 202110L [C++23] */ #include @@ -33,6 +35,10 @@ #if TEST_STD_VER < 14 +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -53,6 +59,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -75,6 +85,10 @@ #elif TEST_STD_VER == 14 +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -95,6 +109,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -117,6 +135,10 @@ #elif TEST_STD_VER == 17 +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges # error "__cpp_lib_ranges should not be defined before c++20" # endif @@ -137,6 +159,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -159,6 +185,10 @@ #elif TEST_STD_VER == 20 +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_ranges # error "__cpp_lib_ranges should be defined in c++20" # endif @@ -182,6 +212,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should not be defined before c++23" # endif @@ -204,6 +238,10 @@ #elif TEST_STD_VER == 23 +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_ranges # error "__cpp_lib_ranges should be defined in c++23" # endif @@ -251,6 +289,10 @@ # error "__cpp_lib_ranges_chunk_by should have the value 202202L in c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should be defined in c++23" @@ -306,6 +348,19 @@ #elif TEST_STD_VER > 23 +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_ranges # error "__cpp_lib_ranges should be defined in c++26" # endif @@ -354,6 +409,19 @@ # endif # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should be defined in c++26" +# endif +# if __cpp_lib_ranges_concat != 202403L +# error "__cpp_lib_ranges_concat should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_ranges_join_with # error "__cpp_lib_ranges_join_with should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp index b5770f8..8d944a1 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/string.version.compile.pass.cpp @@ -15,20 +15,21 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_char8_t 201907L [C++20] - __cpp_lib_constexpr_string 201907L [C++20] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_ranges_to_container 202202L [C++23] - __cpp_lib_starts_ends_with 201711L [C++20] - __cpp_lib_string_contains 202011L [C++23] - __cpp_lib_string_resize_and_overwrite 202110L [C++23] - __cpp_lib_string_udls 201304L [C++14] - __cpp_lib_string_view 201606L [C++17] - 201803L [C++20] - __cpp_lib_to_string 202306L [C++23] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_char8_t 201907L [C++20] + __cpp_lib_constexpr_string 201907L [C++20] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_ranges_to_container 202202L [C++23] + __cpp_lib_starts_ends_with 201711L [C++20] + __cpp_lib_string_contains 202011L [C++23] + __cpp_lib_string_resize_and_overwrite 202110L [C++23] + __cpp_lib_string_udls 201304L [C++14] + __cpp_lib_string_view 201606L [C++17] + 201803L [C++20] + __cpp_lib_to_string 202306L [C++23] */ #include @@ -48,6 +49,10 @@ # error "__cpp_lib_constexpr_string should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -98,6 +103,10 @@ # error "__cpp_lib_constexpr_string should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -154,6 +163,10 @@ # error "__cpp_lib_constexpr_string should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -228,6 +241,10 @@ # error "__cpp_lib_constexpr_string should have the value 201907L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -308,6 +325,10 @@ # error "__cpp_lib_constexpr_string should have the value 201907L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++23" # endif @@ -406,6 +427,19 @@ # error "__cpp_lib_constexpr_string should have the value 201907L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp index ce17aef..6dd2e968 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/tuple.version.compile.pass.cpp @@ -15,15 +15,16 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_apply 201603L [C++17] - __cpp_lib_constexpr_tuple 201811L [C++20] - __cpp_lib_make_from_tuple 201606L [C++17] - __cpp_lib_ranges_zip 202110L [C++23] - __cpp_lib_tuple_element_t 201402L [C++14] - __cpp_lib_tuple_like 202207L [C++23] - 202311L [C++26] - __cpp_lib_tuples_by_type 201304L [C++14] +/* Constant Value + __cpp_lib_apply 201603L [C++17] + __cpp_lib_constexpr_tuple 201811L [C++20] + __cpp_lib_constrained_equality 202403L [C++26] + __cpp_lib_make_from_tuple 201606L [C++17] + __cpp_lib_ranges_zip 202110L [C++23] + __cpp_lib_tuple_element_t 201402L [C++14] + __cpp_lib_tuple_like 202207L [C++23] + 202311L [C++26] + __cpp_lib_tuples_by_type 201304L [C++14] */ #include @@ -39,6 +40,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should not be defined before c++17" # endif @@ -69,6 +74,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should not be defined before c++17" # endif @@ -108,6 +117,10 @@ # error "__cpp_lib_constexpr_tuple should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should be defined in c++17" # endif @@ -153,6 +166,10 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should be defined in c++20" # endif @@ -198,6 +215,10 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++23" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should be defined in c++23" # endif @@ -261,6 +282,19 @@ # error "__cpp_lib_constexpr_tuple should have the value 201811L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202403L +# error "__cpp_lib_constrained_equality should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_make_from_tuple # error "__cpp_lib_make_from_tuple should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp index dd56f8d..ab0988f 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/utility.version.compile.pass.cpp @@ -19,6 +19,7 @@ __cpp_lib_as_const 201510L [C++17] __cpp_lib_constexpr_algorithms 201806L [C++20] __cpp_lib_constexpr_utility 201811L [C++20] + __cpp_lib_constrained_equality 202403L [C++26] __cpp_lib_exchange_function 201304L [C++14] __cpp_lib_forward_like 202207L [C++23] __cpp_lib_integer_comparison_functions 202002L [C++20] @@ -48,6 +49,10 @@ # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should not be defined before c++14" # endif @@ -98,6 +103,10 @@ # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++14" # endif @@ -160,6 +169,10 @@ # error "__cpp_lib_constexpr_utility should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++17" # endif @@ -228,6 +241,10 @@ # error "__cpp_lib_constexpr_utility should have the value 201811L in c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++20" # endif @@ -299,6 +316,10 @@ # error "__cpp_lib_constexpr_utility should have the value 201811L in c++23" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++23" # endif @@ -397,6 +418,19 @@ # error "__cpp_lib_constexpr_utility should have the value 201811L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202403L +# error "__cpp_lib_constrained_equality should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_exchange_function # error "__cpp_lib_exchange_function should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp index 3e65b14..4dcc477 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/variant.version.compile.pass.cpp @@ -16,6 +16,7 @@ // Test the feature test macros defined by /* Constant Value + __cpp_lib_constrained_equality 202403L [C++26] __cpp_lib_freestanding_variant 202311L [C++26] __cpp_lib_variant 202102L [C++17] */ @@ -25,6 +26,10 @@ #if TEST_STD_VER < 14 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should not be defined before c++26" # endif @@ -35,6 +40,10 @@ #elif TEST_STD_VER == 14 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should not be defined before c++26" # endif @@ -45,6 +54,10 @@ #elif TEST_STD_VER == 17 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should not be defined before c++26" # endif @@ -58,6 +71,10 @@ #elif TEST_STD_VER == 20 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should not be defined before c++26" # endif @@ -71,6 +88,10 @@ #elif TEST_STD_VER == 23 +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should not be defined before c++26" # endif @@ -85,6 +106,19 @@ #elif TEST_STD_VER > 23 # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202403L +# error "__cpp_lib_constrained_equality should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_freestanding_variant # error "__cpp_lib_freestanding_variant should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.compile.pass.cpp index 6eee936..3d0a956 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/vector.version.compile.pass.cpp @@ -15,13 +15,14 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_constexpr_vector 201907L [C++20] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_incomplete_container_elements 201505L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_ranges_to_container 202202L [C++23] +/* Constant Value + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_constexpr_vector 201907L [C++20] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_incomplete_container_elements 201505L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_ranges_to_container 202202L [C++23] */ #include @@ -37,6 +38,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -63,6 +68,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -92,6 +101,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_erase_if # error "__cpp_lib_erase_if should not be defined before c++20" # endif @@ -130,6 +143,10 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++20" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++20" # endif @@ -171,6 +188,10 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++23" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++23" # endif @@ -215,6 +236,19 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_erase_if # error "__cpp_lib_erase_if should be defined in c++26" # endif diff --git a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp index 5501587..5055786 100644 --- a/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp +++ b/libcxx/test/std/language.support/support.limits/support.limits.general/version.version.compile.pass.cpp @@ -15,217 +15,224 @@ // Test the feature test macros defined by -/* Constant Value - __cpp_lib_adaptor_iterator_pair_constructor 202106L [C++23] - __cpp_lib_addressof_constexpr 201603L [C++17] - __cpp_lib_allocate_at_least 202302L [C++23] - __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] - __cpp_lib_any 201606L [C++17] - __cpp_lib_apply 201603L [C++17] - __cpp_lib_array_constexpr 201603L [C++17] - 201811L [C++20] - __cpp_lib_as_const 201510L [C++17] - __cpp_lib_associative_heterogeneous_erasure 202110L [C++23] - __cpp_lib_associative_heterogeneous_insertion 202306L [C++26] - __cpp_lib_assume_aligned 201811L [C++20] - __cpp_lib_atomic_flag_test 201907L [C++20] - __cpp_lib_atomic_float 201711L [C++20] - __cpp_lib_atomic_is_always_lock_free 201603L [C++17] - __cpp_lib_atomic_lock_free_type_aliases 201907L [C++20] - __cpp_lib_atomic_ref 201806L [C++20] - __cpp_lib_atomic_shared_ptr 201711L [C++20] - __cpp_lib_atomic_value_initialization 201911L [C++20] - __cpp_lib_atomic_wait 201907L [C++20] - __cpp_lib_barrier 201907L [C++20] - __cpp_lib_bind_back 202202L [C++23] - 202306L [C++26] - __cpp_lib_bind_front 201907L [C++20] - 202306L [C++26] - __cpp_lib_bit_cast 201806L [C++20] - __cpp_lib_bitops 201907L [C++20] - __cpp_lib_bitset 202306L [C++26] - __cpp_lib_bool_constant 201505L [C++17] - __cpp_lib_bounded_array_traits 201902L [C++20] - __cpp_lib_boyer_moore_searcher 201603L [C++17] - __cpp_lib_byte 201603L [C++17] - __cpp_lib_byteswap 202110L [C++23] - __cpp_lib_char8_t 201907L [C++20] - __cpp_lib_chrono 201611L [C++17] - __cpp_lib_chrono_udls 201304L [C++14] - __cpp_lib_clamp 201603L [C++17] - __cpp_lib_complex_udls 201309L [C++14] - __cpp_lib_concepts 202002L [C++20] - __cpp_lib_constexpr_algorithms 201806L [C++20] - __cpp_lib_constexpr_bitset 202207L [C++23] - __cpp_lib_constexpr_charconv 202207L [C++23] - __cpp_lib_constexpr_cmath 202202L [C++23] - __cpp_lib_constexpr_complex 201711L [C++20] - __cpp_lib_constexpr_dynamic_alloc 201907L [C++20] - __cpp_lib_constexpr_functional 201907L [C++20] - __cpp_lib_constexpr_iterator 201811L [C++20] - __cpp_lib_constexpr_memory 201811L [C++20] - 202202L [C++23] - __cpp_lib_constexpr_numeric 201911L [C++20] - __cpp_lib_constexpr_string 201907L [C++20] - __cpp_lib_constexpr_string_view 201811L [C++20] - __cpp_lib_constexpr_tuple 201811L [C++20] - __cpp_lib_constexpr_typeinfo 202106L [C++23] - __cpp_lib_constexpr_utility 201811L [C++20] - __cpp_lib_constexpr_vector 201907L [C++20] - __cpp_lib_copyable_function 202306L [C++26] - __cpp_lib_coroutine 201902L [C++20] - __cpp_lib_debugging 202311L [C++26] - __cpp_lib_destroying_delete 201806L [C++20] - __cpp_lib_enable_shared_from_this 201603L [C++17] - __cpp_lib_endian 201907L [C++20] - __cpp_lib_erase_if 202002L [C++20] - __cpp_lib_exchange_function 201304L [C++14] - __cpp_lib_execution 201603L [C++17] - 201902L [C++20] - __cpp_lib_expected 202211L [C++23] - __cpp_lib_filesystem 201703L [C++17] - __cpp_lib_format 202106L [C++20] - __cpp_lib_format_ranges 202207L [C++23] - __cpp_lib_format_uchar 202311L [C++20] - __cpp_lib_formatters 202302L [C++23] - __cpp_lib_forward_like 202207L [C++23] - __cpp_lib_freestanding_algorithm 202311L [C++26] - __cpp_lib_freestanding_array 202311L [C++26] - __cpp_lib_freestanding_cstring 202306L [C++26] - __cpp_lib_freestanding_expected 202311L [C++26] - __cpp_lib_freestanding_mdspan 202311L [C++26] - __cpp_lib_freestanding_optional 202311L [C++26] - __cpp_lib_freestanding_string_view 202311L [C++26] - __cpp_lib_freestanding_variant 202311L [C++26] - __cpp_lib_fstream_native_handle 202306L [C++26] - __cpp_lib_function_ref 202306L [C++26] - __cpp_lib_gcd_lcm 201606L [C++17] - __cpp_lib_generic_associative_lookup 201304L [C++14] - __cpp_lib_generic_unordered_lookup 201811L [C++20] - __cpp_lib_hardware_interference_size 201703L [C++17] - __cpp_lib_has_unique_object_representations 201606L [C++17] - __cpp_lib_hazard_pointer 202306L [C++26] - __cpp_lib_hypot 201603L [C++17] - __cpp_lib_incomplete_container_elements 201505L [C++17] - __cpp_lib_int_pow2 202002L [C++20] - __cpp_lib_integer_comparison_functions 202002L [C++20] - __cpp_lib_integer_sequence 201304L [C++14] - __cpp_lib_integral_constant_callable 201304L [C++14] - __cpp_lib_interpolate 201902L [C++20] - __cpp_lib_invoke 201411L [C++17] - __cpp_lib_invoke_r 202106L [C++23] - __cpp_lib_ios_noreplace 202207L [C++23] - __cpp_lib_is_aggregate 201703L [C++17] - __cpp_lib_is_constant_evaluated 201811L [C++20] - __cpp_lib_is_final 201402L [C++14] - __cpp_lib_is_invocable 201703L [C++17] - __cpp_lib_is_layout_compatible 201907L [C++20] - __cpp_lib_is_nothrow_convertible 201806L [C++20] - __cpp_lib_is_null_pointer 201309L [C++14] - __cpp_lib_is_pointer_interconvertible 201907L [C++20] - __cpp_lib_is_scoped_enum 202011L [C++23] - __cpp_lib_is_swappable 201603L [C++17] - __cpp_lib_jthread 201911L [C++20] - __cpp_lib_latch 201907L [C++20] - __cpp_lib_launder 201606L [C++17] - __cpp_lib_linalg 202311L [C++26] - __cpp_lib_list_remove_return_type 201806L [C++20] - __cpp_lib_logical_traits 201510L [C++17] - __cpp_lib_make_from_tuple 201606L [C++17] - __cpp_lib_make_reverse_iterator 201402L [C++14] - __cpp_lib_make_unique 201304L [C++14] - __cpp_lib_map_try_emplace 201411L [C++17] - __cpp_lib_math_constants 201907L [C++20] - __cpp_lib_math_special_functions 201603L [C++17] - __cpp_lib_mdspan 202207L [C++23] - __cpp_lib_memory_resource 201603L [C++17] - __cpp_lib_move_iterator_concept 202207L [C++20] - __cpp_lib_move_only_function 202110L [C++23] - __cpp_lib_node_extract 201606L [C++17] - __cpp_lib_nonmember_container_access 201411L [C++17] - __cpp_lib_not_fn 201603L [C++17] - __cpp_lib_null_iterators 201304L [C++14] - __cpp_lib_optional 201606L [C++17] - 202110L [C++23] - __cpp_lib_out_ptr 202106L [C++23] - 202311L [C++26] - __cpp_lib_parallel_algorithm 201603L [C++17] - __cpp_lib_polymorphic_allocator 201902L [C++20] - __cpp_lib_print 202207L [C++23] - __cpp_lib_quoted_string_io 201304L [C++14] - __cpp_lib_ranges 202207L [C++20] - __cpp_lib_ranges_as_const 202207L [C++23] - __cpp_lib_ranges_as_rvalue 202207L [C++23] - __cpp_lib_ranges_chunk 202202L [C++23] - __cpp_lib_ranges_chunk_by 202202L [C++23] - __cpp_lib_ranges_contains 202207L [C++23] - __cpp_lib_ranges_iota 202202L [C++23] - __cpp_lib_ranges_join_with 202202L [C++23] - __cpp_lib_ranges_repeat 202207L [C++23] - __cpp_lib_ranges_slide 202202L [C++23] - __cpp_lib_ranges_starts_ends_with 202106L [C++23] - __cpp_lib_ranges_to_container 202202L [C++23] - __cpp_lib_ranges_zip 202110L [C++23] - __cpp_lib_ratio 202306L [C++26] - __cpp_lib_raw_memory_algorithms 201606L [C++17] - __cpp_lib_rcu 202306L [C++26] - __cpp_lib_reference_from_temporary 202202L [C++23] - __cpp_lib_remove_cvref 201711L [C++20] - __cpp_lib_result_of_sfinae 201210L [C++14] - __cpp_lib_robust_nonmodifying_seq_ops 201304L [C++14] - __cpp_lib_sample 201603L [C++17] - __cpp_lib_saturation_arithmetic 202311L [C++26] - __cpp_lib_scoped_lock 201703L [C++17] - __cpp_lib_semaphore 201907L [C++20] - __cpp_lib_shared_mutex 201505L [C++17] - __cpp_lib_shared_ptr_arrays 201611L [C++17] - 201707L [C++20] - __cpp_lib_shared_ptr_weak_type 201606L [C++17] - __cpp_lib_shared_timed_mutex 201402L [C++14] - __cpp_lib_shift 201806L [C++20] - __cpp_lib_smart_ptr_for_overwrite 202002L [C++20] - __cpp_lib_smart_ptr_owner_equality 202306L [C++26] - __cpp_lib_source_location 201907L [C++20] - __cpp_lib_span 202002L [C++20] - __cpp_lib_span_at 202311L [C++26] - __cpp_lib_span_initializer_list 202311L [C++26] - __cpp_lib_spanstream 202106L [C++23] - __cpp_lib_ssize 201902L [C++20] - __cpp_lib_sstream_from_string_view 202306L [C++26] - __cpp_lib_stacktrace 202011L [C++23] - __cpp_lib_starts_ends_with 201711L [C++20] - __cpp_lib_stdatomic_h 202011L [C++23] - __cpp_lib_string_contains 202011L [C++23] - __cpp_lib_string_resize_and_overwrite 202110L [C++23] - __cpp_lib_string_udls 201304L [C++14] - __cpp_lib_string_view 201606L [C++17] - 201803L [C++20] - __cpp_lib_submdspan 202306L [C++26] - __cpp_lib_syncbuf 201803L [C++20] - __cpp_lib_text_encoding 202306L [C++26] - __cpp_lib_three_way_comparison 201907L [C++20] - __cpp_lib_to_address 201711L [C++20] - __cpp_lib_to_array 201907L [C++20] - __cpp_lib_to_chars 201611L [C++17] - 202306L [C++26] - __cpp_lib_to_string 202306L [C++23] - __cpp_lib_to_underlying 202102L [C++23] - __cpp_lib_transformation_trait_aliases 201304L [C++14] - __cpp_lib_transparent_operators 201210L [C++14] - 201510L [C++17] - __cpp_lib_tuple_element_t 201402L [C++14] - __cpp_lib_tuple_like 202207L [C++23] - 202311L [C++26] - __cpp_lib_tuples_by_type 201304L [C++14] - __cpp_lib_type_identity 201806L [C++20] - __cpp_lib_type_trait_variable_templates 201510L [C++17] - __cpp_lib_uncaught_exceptions 201411L [C++17] - __cpp_lib_unordered_map_try_emplace 201411L [C++17] - __cpp_lib_unreachable 202202L [C++23] - __cpp_lib_unwrap_ref 201811L [C++20] - __cpp_lib_variant 202102L [C++17] - __cpp_lib_void_t 201411L [C++17] - __cpp_lib_within_lifetime 202306L [C++26] +/* Constant Value + __cpp_lib_adaptor_iterator_pair_constructor 202106L [C++23] + __cpp_lib_addressof_constexpr 201603L [C++17] + __cpp_lib_allocate_at_least 202302L [C++23] + __cpp_lib_allocator_traits_is_always_equal 201411L [C++17] + __cpp_lib_any 201606L [C++17] + __cpp_lib_apply 201603L [C++17] + __cpp_lib_array_constexpr 201603L [C++17] + 201811L [C++20] + __cpp_lib_as_const 201510L [C++17] + __cpp_lib_associative_heterogeneous_erasure 202110L [C++23] + __cpp_lib_associative_heterogeneous_insertion 202306L [C++26] + __cpp_lib_assume_aligned 201811L [C++20] + __cpp_lib_atomic_flag_test 201907L [C++20] + __cpp_lib_atomic_float 201711L [C++20] + __cpp_lib_atomic_is_always_lock_free 201603L [C++17] + __cpp_lib_atomic_lock_free_type_aliases 201907L [C++20] + __cpp_lib_atomic_min_max 202403L [C++26] + __cpp_lib_atomic_ref 201806L [C++20] + __cpp_lib_atomic_shared_ptr 201711L [C++20] + __cpp_lib_atomic_value_initialization 201911L [C++20] + __cpp_lib_atomic_wait 201907L [C++20] + __cpp_lib_barrier 201907L [C++20] + __cpp_lib_bind_back 202202L [C++23] + 202306L [C++26] + __cpp_lib_bind_front 201907L [C++20] + 202306L [C++26] + __cpp_lib_bit_cast 201806L [C++20] + __cpp_lib_bitops 201907L [C++20] + __cpp_lib_bitset 202306L [C++26] + __cpp_lib_bool_constant 201505L [C++17] + __cpp_lib_bounded_array_traits 201902L [C++20] + __cpp_lib_boyer_moore_searcher 201603L [C++17] + __cpp_lib_byte 201603L [C++17] + __cpp_lib_byteswap 202110L [C++23] + __cpp_lib_char8_t 201907L [C++20] + __cpp_lib_chrono 201611L [C++17] + __cpp_lib_chrono_udls 201304L [C++14] + __cpp_lib_clamp 201603L [C++17] + __cpp_lib_complex_udls 201309L [C++14] + __cpp_lib_concepts 202002L [C++20] + __cpp_lib_constexpr_algorithms 201806L [C++20] + __cpp_lib_constexpr_bitset 202207L [C++23] + __cpp_lib_constexpr_charconv 202207L [C++23] + __cpp_lib_constexpr_cmath 202202L [C++23] + __cpp_lib_constexpr_complex 201711L [C++20] + __cpp_lib_constexpr_dynamic_alloc 201907L [C++20] + __cpp_lib_constexpr_functional 201907L [C++20] + __cpp_lib_constexpr_iterator 201811L [C++20] + __cpp_lib_constexpr_memory 201811L [C++20] + 202202L [C++23] + __cpp_lib_constexpr_numeric 201911L [C++20] + __cpp_lib_constexpr_string 201907L [C++20] + __cpp_lib_constexpr_string_view 201811L [C++20] + __cpp_lib_constexpr_tuple 201811L [C++20] + __cpp_lib_constexpr_typeinfo 202106L [C++23] + __cpp_lib_constexpr_utility 201811L [C++20] + __cpp_lib_constexpr_vector 201907L [C++20] + __cpp_lib_constrained_equality 202403L [C++26] + __cpp_lib_copyable_function 202306L [C++26] + __cpp_lib_coroutine 201902L [C++20] + __cpp_lib_debugging 202311L [C++26] + __cpp_lib_default_template_type_for_algorithm_values 202403L [C++26] + __cpp_lib_destroying_delete 201806L [C++20] + __cpp_lib_enable_shared_from_this 201603L [C++17] + __cpp_lib_endian 201907L [C++20] + __cpp_lib_erase_if 202002L [C++20] + __cpp_lib_exchange_function 201304L [C++14] + __cpp_lib_execution 201603L [C++17] + 201902L [C++20] + __cpp_lib_expected 202211L [C++23] + __cpp_lib_filesystem 201703L [C++17] + __cpp_lib_format 202106L [C++20] + __cpp_lib_format_path 202403L [C++23] + __cpp_lib_format_ranges 202207L [C++23] + __cpp_lib_format_uchar 202311L [C++20] + __cpp_lib_formatters 202302L [C++23] + __cpp_lib_forward_like 202207L [C++23] + __cpp_lib_freestanding_algorithm 202311L [C++26] + __cpp_lib_freestanding_array 202311L [C++26] + __cpp_lib_freestanding_cstring 202306L [C++26] + __cpp_lib_freestanding_expected 202311L [C++26] + __cpp_lib_freestanding_mdspan 202311L [C++26] + __cpp_lib_freestanding_optional 202311L [C++26] + __cpp_lib_freestanding_string_view 202311L [C++26] + __cpp_lib_freestanding_variant 202311L [C++26] + __cpp_lib_fstream_native_handle 202306L [C++26] + __cpp_lib_function_ref 202306L [C++26] + __cpp_lib_gcd_lcm 201606L [C++17] + __cpp_lib_generate_random 202403L [C++26] + __cpp_lib_generic_associative_lookup 201304L [C++14] + __cpp_lib_generic_unordered_lookup 201811L [C++20] + __cpp_lib_hardware_interference_size 201703L [C++17] + __cpp_lib_has_unique_object_representations 201606L [C++17] + __cpp_lib_hazard_pointer 202306L [C++26] + __cpp_lib_hypot 201603L [C++17] + __cpp_lib_incomplete_container_elements 201505L [C++17] + __cpp_lib_int_pow2 202002L [C++20] + __cpp_lib_integer_comparison_functions 202002L [C++20] + __cpp_lib_integer_sequence 201304L [C++14] + __cpp_lib_integral_constant_callable 201304L [C++14] + __cpp_lib_interpolate 201902L [C++20] + __cpp_lib_invoke 201411L [C++17] + __cpp_lib_invoke_r 202106L [C++23] + __cpp_lib_ios_noreplace 202207L [C++23] + __cpp_lib_is_aggregate 201703L [C++17] + __cpp_lib_is_constant_evaluated 201811L [C++20] + __cpp_lib_is_final 201402L [C++14] + __cpp_lib_is_invocable 201703L [C++17] + __cpp_lib_is_layout_compatible 201907L [C++20] + __cpp_lib_is_nothrow_convertible 201806L [C++20] + __cpp_lib_is_null_pointer 201309L [C++14] + __cpp_lib_is_pointer_interconvertible 201907L [C++20] + __cpp_lib_is_scoped_enum 202011L [C++23] + __cpp_lib_is_swappable 201603L [C++17] + __cpp_lib_jthread 201911L [C++20] + __cpp_lib_latch 201907L [C++20] + __cpp_lib_launder 201606L [C++17] + __cpp_lib_linalg 202311L [C++26] + __cpp_lib_list_remove_return_type 201806L [C++20] + __cpp_lib_logical_traits 201510L [C++17] + __cpp_lib_make_from_tuple 201606L [C++17] + __cpp_lib_make_reverse_iterator 201402L [C++14] + __cpp_lib_make_unique 201304L [C++14] + __cpp_lib_map_try_emplace 201411L [C++17] + __cpp_lib_math_constants 201907L [C++20] + __cpp_lib_math_special_functions 201603L [C++17] + __cpp_lib_mdspan 202207L [C++23] + __cpp_lib_memory_resource 201603L [C++17] + __cpp_lib_move_iterator_concept 202207L [C++20] + __cpp_lib_move_only_function 202110L [C++23] + __cpp_lib_node_extract 201606L [C++17] + __cpp_lib_nonmember_container_access 201411L [C++17] + __cpp_lib_not_fn 201603L [C++17] + __cpp_lib_null_iterators 201304L [C++14] + __cpp_lib_optional 201606L [C++17] + 202110L [C++23] + __cpp_lib_out_ptr 202106L [C++23] + 202311L [C++26] + __cpp_lib_parallel_algorithm 201603L [C++17] + __cpp_lib_polymorphic_allocator 201902L [C++20] + __cpp_lib_print 202207L [C++23] + __cpp_lib_quoted_string_io 201304L [C++14] + __cpp_lib_ranges 202207L [C++20] + __cpp_lib_ranges_as_const 202207L [C++23] + __cpp_lib_ranges_as_rvalue 202207L [C++23] + __cpp_lib_ranges_chunk 202202L [C++23] + __cpp_lib_ranges_chunk_by 202202L [C++23] + __cpp_lib_ranges_concat 202403L [C++26] + __cpp_lib_ranges_contains 202207L [C++23] + __cpp_lib_ranges_iota 202202L [C++23] + __cpp_lib_ranges_join_with 202202L [C++23] + __cpp_lib_ranges_repeat 202207L [C++23] + __cpp_lib_ranges_slide 202202L [C++23] + __cpp_lib_ranges_starts_ends_with 202106L [C++23] + __cpp_lib_ranges_to_container 202202L [C++23] + __cpp_lib_ranges_zip 202110L [C++23] + __cpp_lib_ratio 202306L [C++26] + __cpp_lib_raw_memory_algorithms 201606L [C++17] + __cpp_lib_rcu 202306L [C++26] + __cpp_lib_reference_from_temporary 202202L [C++23] + __cpp_lib_reference_wrapper 202403L [C++26] + __cpp_lib_remove_cvref 201711L [C++20] + __cpp_lib_result_of_sfinae 201210L [C++14] + __cpp_lib_robust_nonmodifying_seq_ops 201304L [C++14] + __cpp_lib_sample 201603L [C++17] + __cpp_lib_saturation_arithmetic 202311L [C++26] + __cpp_lib_scoped_lock 201703L [C++17] + __cpp_lib_semaphore 201907L [C++20] + __cpp_lib_shared_mutex 201505L [C++17] + __cpp_lib_shared_ptr_arrays 201611L [C++17] + 201707L [C++20] + __cpp_lib_shared_ptr_weak_type 201606L [C++17] + __cpp_lib_shared_timed_mutex 201402L [C++14] + __cpp_lib_shift 201806L [C++20] + __cpp_lib_smart_ptr_for_overwrite 202002L [C++20] + __cpp_lib_smart_ptr_owner_equality 202306L [C++26] + __cpp_lib_source_location 201907L [C++20] + __cpp_lib_span 202002L [C++20] + __cpp_lib_span_at 202311L [C++26] + __cpp_lib_span_initializer_list 202311L [C++26] + __cpp_lib_spanstream 202106L [C++23] + __cpp_lib_ssize 201902L [C++20] + __cpp_lib_sstream_from_string_view 202306L [C++26] + __cpp_lib_stacktrace 202011L [C++23] + __cpp_lib_starts_ends_with 201711L [C++20] + __cpp_lib_stdatomic_h 202011L [C++23] + __cpp_lib_string_contains 202011L [C++23] + __cpp_lib_string_resize_and_overwrite 202110L [C++23] + __cpp_lib_string_udls 201304L [C++14] + __cpp_lib_string_view 201606L [C++17] + 201803L [C++20] + __cpp_lib_submdspan 202306L [C++26] + __cpp_lib_syncbuf 201803L [C++20] + __cpp_lib_text_encoding 202306L [C++26] + __cpp_lib_three_way_comparison 201907L [C++20] + __cpp_lib_to_address 201711L [C++20] + __cpp_lib_to_array 201907L [C++20] + __cpp_lib_to_chars 201611L [C++17] + 202306L [C++26] + __cpp_lib_to_string 202306L [C++23] + __cpp_lib_to_underlying 202102L [C++23] + __cpp_lib_transformation_trait_aliases 201304L [C++14] + __cpp_lib_transparent_operators 201210L [C++14] + 201510L [C++17] + __cpp_lib_tuple_element_t 201402L [C++14] + __cpp_lib_tuple_like 202207L [C++23] + 202311L [C++26] + __cpp_lib_tuples_by_type 201304L [C++14] + __cpp_lib_type_identity 201806L [C++20] + __cpp_lib_type_trait_variable_templates 201510L [C++17] + __cpp_lib_uncaught_exceptions 201411L [C++17] + __cpp_lib_unordered_map_try_emplace 201411L [C++17] + __cpp_lib_unreachable 202202L [C++23] + __cpp_lib_unwrap_ref 201811L [C++20] + __cpp_lib_variant 202102L [C++17] + __cpp_lib_void_t 201411L [C++17] + __cpp_lib_within_lifetime 202306L [C++26] */ #include @@ -293,6 +300,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -441,6 +452,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should not be defined before c++26" # endif @@ -453,6 +468,10 @@ # error "__cpp_lib_debugging should not be defined before c++26" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -489,6 +508,10 @@ # error "__cpp_lib_format should not be defined before c++20" # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + # ifdef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should not be defined before c++23" # endif @@ -549,6 +572,10 @@ # error "__cpp_lib_gcd_lcm should not be defined before c++17" # endif +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + # ifdef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should not be defined before c++14" # endif @@ -773,6 +800,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should not be defined before c++23" # endif @@ -821,6 +852,10 @@ # error "__cpp_lib_reference_from_temporary should not be defined before c++23" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifdef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should not be defined before c++20" # endif @@ -1087,6 +1122,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -1241,6 +1280,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should not be defined before c++26" # endif @@ -1253,6 +1296,10 @@ # error "__cpp_lib_debugging should not be defined before c++26" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -1292,6 +1339,10 @@ # error "__cpp_lib_format should not be defined before c++20" # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + # ifdef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should not be defined before c++23" # endif @@ -1352,6 +1403,10 @@ # error "__cpp_lib_gcd_lcm should not be defined before c++17" # endif +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + # ifndef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should be defined in c++14" # endif @@ -1609,6 +1664,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should not be defined before c++23" # endif @@ -1657,6 +1716,10 @@ # error "__cpp_lib_reference_from_temporary should not be defined before c++23" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifdef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should not be defined before c++20" # endif @@ -1974,6 +2037,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should not be defined before c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # ifdef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should not be defined before c++20" # endif @@ -2143,6 +2210,10 @@ # error "__cpp_lib_constexpr_vector should not be defined before c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should not be defined before c++26" # endif @@ -2155,6 +2226,10 @@ # error "__cpp_lib_debugging should not be defined before c++26" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # ifdef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should not be defined before c++20" # endif @@ -2215,6 +2290,10 @@ # error "__cpp_lib_format should not be defined before c++20" # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + # ifdef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should not be defined before c++23" # endif @@ -2278,6 +2357,10 @@ # error "__cpp_lib_gcd_lcm should have the value 201606L in c++17" # endif +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + # ifndef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should be defined in c++17" # endif @@ -2616,6 +2699,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should not be defined before c++23" # endif @@ -2667,6 +2754,10 @@ # error "__cpp_lib_reference_from_temporary should not be defined before c++23" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifdef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should not be defined before c++20" # endif @@ -3056,6 +3147,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++20" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++20" @@ -3324,6 +3419,10 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++20" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should not be defined before c++26" # endif @@ -3339,6 +3438,10 @@ # error "__cpp_lib_debugging should not be defined before c++26" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++20" @@ -3423,6 +3526,10 @@ # endif # endif +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined before c++23" +# endif + # ifdef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should not be defined before c++23" # endif @@ -3489,6 +3596,10 @@ # error "__cpp_lib_gcd_lcm should have the value 201606L in c++20" # endif +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + # ifndef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should be defined in c++20" # endif @@ -3902,6 +4013,10 @@ # error "__cpp_lib_ranges_chunk_by should not be defined before c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifdef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should not be defined before c++23" # endif @@ -3953,6 +4068,10 @@ # error "__cpp_lib_reference_from_temporary should not be defined before c++23" # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should be defined in c++20" # endif @@ -4423,6 +4542,10 @@ # error "__cpp_lib_atomic_lock_free_type_aliases should have the value 201907L in c++23" # endif +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined before c++26" +# endif + # if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++23" @@ -4721,6 +4844,10 @@ # error "__cpp_lib_constexpr_vector should have the value 201907L in c++23" # endif +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined before c++26" +# endif + # ifdef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should not be defined before c++26" # endif @@ -4736,6 +4863,10 @@ # error "__cpp_lib_debugging should not be defined before c++26" # endif +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined before c++26" +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++23" @@ -4823,6 +4954,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_format_path +# error "__cpp_lib_format_path should be defined in c++23" +# endif +# if __cpp_lib_format_path != 202403L +# error "__cpp_lib_format_path should have the value 202403L in c++23" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should be defined in c++23" # endif @@ -4904,6 +5048,10 @@ # error "__cpp_lib_gcd_lcm should have the value 201606L in c++23" # endif +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined before c++26" +# endif + # ifndef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should be defined in c++23" # endif @@ -5374,6 +5522,10 @@ # error "__cpp_lib_ranges_chunk_by should have the value 202202L in c++23" # endif +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined before c++26" +# endif + # ifndef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should be defined in c++23" # endif @@ -5482,6 +5634,10 @@ # endif # endif +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined before c++26" +# endif + # ifndef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should be defined in c++23" # endif @@ -6013,6 +6169,19 @@ # endif # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should be defined in c++26" +# endif +# if __cpp_lib_atomic_min_max != 202403L +# error "__cpp_lib_atomic_min_max should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_atomic_min_max +# error "__cpp_lib_atomic_min_max should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_atomic_ref # error "__cpp_lib_atomic_ref should be defined in c++26" # endif @@ -6314,6 +6483,19 @@ # endif # if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should be defined in c++26" +# endif +# if __cpp_lib_constrained_equality != 202403L +# error "__cpp_lib_constrained_equality should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_constrained_equality +# error "__cpp_lib_constrained_equality should not be defined because it is unimplemented in libc++!" +# endif +# endif + +# if !defined(_LIBCPP_VERSION) # ifndef __cpp_lib_copyable_function # error "__cpp_lib_copyable_function should be defined in c++26" # endif @@ -6346,6 +6528,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should be defined in c++26" +# endif +# if __cpp_lib_default_template_type_for_algorithm_values != 202403L +# error "__cpp_lib_default_template_type_for_algorithm_values should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_default_template_type_for_algorithm_values +# error "__cpp_lib_default_template_type_for_algorithm_values should not be defined because it is unimplemented in libc++!" +# endif +# endif + # if TEST_STD_VER > 17 && defined(__cpp_impl_destroying_delete) && __cpp_impl_destroying_delete >= 201806L # ifndef __cpp_lib_destroying_delete # error "__cpp_lib_destroying_delete should be defined in c++26" @@ -6433,6 +6628,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_format_path +# error "__cpp_lib_format_path should be defined in c++26" +# endif +# if __cpp_lib_format_path != 202403L +# error "__cpp_lib_format_path should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_format_path +# error "__cpp_lib_format_path should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_format_ranges # error "__cpp_lib_format_ranges should be defined in c++26" # endif @@ -6604,6 +6812,19 @@ # error "__cpp_lib_gcd_lcm should have the value 201606L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should be defined in c++26" +# endif +# if __cpp_lib_generate_random != 202403L +# error "__cpp_lib_generate_random should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_generate_random +# error "__cpp_lib_generate_random should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_generic_associative_lookup # error "__cpp_lib_generic_associative_lookup should be defined in c++26" # endif @@ -7092,6 +7313,19 @@ # error "__cpp_lib_ranges_chunk_by should have the value 202202L in c++26" # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should be defined in c++26" +# endif +# if __cpp_lib_ranges_concat != 202403L +# error "__cpp_lib_ranges_concat should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_ranges_concat +# error "__cpp_lib_ranges_concat should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_ranges_contains # error "__cpp_lib_ranges_contains should be defined in c++26" # endif @@ -7212,6 +7446,19 @@ # endif # endif +# if !defined(_LIBCPP_VERSION) +# ifndef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should be defined in c++26" +# endif +# if __cpp_lib_reference_wrapper != 202403L +# error "__cpp_lib_reference_wrapper should have the value 202403L in c++26" +# endif +# else // _LIBCPP_VERSION +# ifdef __cpp_lib_reference_wrapper +# error "__cpp_lib_reference_wrapper should not be defined because it is unimplemented in libc++!" +# endif +# endif + # ifndef __cpp_lib_remove_cvref # error "__cpp_lib_remove_cvref should be defined in c++26" # endif diff --git a/libcxx/utils/generate_feature_test_macro_components.py b/libcxx/utils/generate_feature_test_macro_components.py index c55f5c7..759e490 100755 --- a/libcxx/utils/generate_feature_test_macro_components.py +++ b/libcxx/utils/generate_feature_test_macro_components.py @@ -171,6 +171,12 @@ feature_test_macros = [ "headers": ["atomic"], }, { + "name": "__cpp_lib_atomic_min_max", + "values": {"c++26": 202403}, # P0493R5: Atomic minimum/maximum + "headers": ["atomic"], + "unimplemented": True, + }, + { "name": "__cpp_lib_atomic_ref", "values": {"c++20": 201806}, "headers": ["atomic"], @@ -387,6 +393,12 @@ feature_test_macros = [ "headers": ["vector"], }, { + "name": "__cpp_lib_constrained_equality", + "values": {"c++26": 202403}, # P2944R3: Comparisons for reference_wrapper + "headers": ["optional", "tuple", "utility", "variant"], + "unimplemented": True, + }, + { "name": "__cpp_lib_copyable_function", "values": {"c++26": 202306}, # P2548R6 copyable_function "headers": ["functional"], @@ -399,11 +411,20 @@ feature_test_macros = [ }, { "name": "__cpp_lib_debugging", - "values": {"c++26": 202311}, # P2546R5 Debugging Support + "values": { + "c++26": 202311, # P2546R5 Debugging Support + # "c++26": 202403, # P2810R4: is_debugger_present is_replaceable + }, "headers": ["debugging"], "unimplemented": True, }, { + "name": "__cpp_lib_default_template_type_for_algorithm_values", + "values": {"c++26": 202403}, # P2248R8: Enabling list-initialization for algorithms + "headers": ["algorithm", "deque", "forward_list", "list", "ranges", "string", "vector"], + "unimplemented": True, + }, + { "name": "__cpp_lib_destroying_delete", "values": {"c++20": 201806}, "headers": ["new"], @@ -477,6 +498,12 @@ feature_test_macros = [ "unimplemented": True, }, { + "name": "__cpp_lib_format_path", + "values": {"c++23": 202403}, # P2845R8: Formatting of std::filesystem::path + "headers": ["filesystem"], + "unimplemented": True, + }, + { "name": "__cpp_lib_format_ranges", "values": {"c++23": 202207}, "headers": ["format"], @@ -587,6 +614,12 @@ feature_test_macros = [ "headers": ["numeric"], }, { + "name": "__cpp_lib_generate_random", + "values": {"c++26": 202403}, # P1068R11: Vector API for random number generation + "headers": ["random"], + "unimplemented": True, + }, + { "name": "__cpp_lib_generic_associative_lookup", "values": {"c++14": 201304}, "headers": ["map", "set"], @@ -874,7 +907,10 @@ feature_test_macros = [ }, { "name": "__cpp_lib_print", - "values": {"c++23": 202207}, + "values": { + "c++23": 202207, + # "c++26": 202403, # P3107R5: Permit an efficient implementation of std::print + }, "headers": ["ostream", "print"], }, { @@ -915,6 +951,12 @@ feature_test_macros = [ "headers": ["ranges"], }, { + "name": "__cpp_lib_ranges_concat", + "values": {"c++26": 202403}, # P2542R8: views::concat + "headers": ["ranges"], + "unimplemented": True, + }, + { "name": "__cpp_lib_ranges_contains", "values": {"c++23": 202207}, "headers": ["algorithm"], @@ -996,6 +1038,12 @@ feature_test_macros = [ "unimplemented": True, }, { + "name": "__cpp_lib_reference_wrapper", + "values": {"c++26": 202403}, # P2944R3: Comparisons for reference_wrapper + "headers": ["functional"], + "unimplemented": True, + }, + { "name": "__cpp_lib_remove_cvref", "values": {"c++20": 201711}, "headers": ["type_traits"], @@ -1152,12 +1200,19 @@ feature_test_macros = [ }, { "name": "__cpp_lib_string_view", - "values": {"c++17": 201606, "c++20": 201803}, + "values": { + "c++17": 201606, + "c++20": 201803, + # "c++26": 202403, # P2591R5: Concatenation of strings and string views + }, "headers": ["string", "string_view"], }, { "name": "__cpp_lib_submdspan", - "values": {"c++26": 202306}, # P2630R4 submdspan + "values": { + "c++26": 202306, # P2630R4: submdspan + # "c++26": 202403, # P2642R6: Padded mdspan layouts + }, "headers": ["mdspan"], "unimplemented": True, }, @@ -1297,25 +1352,11 @@ feature_test_macros = [ ] assert feature_test_macros == sorted(feature_test_macros, key=lambda tc: tc["name"]) -assert all(tc["headers"] == sorted(tc["headers"]) for tc in feature_test_macros) -assert all( - ("libcxx_guard" in tc) == ("test_suite_guard" in tc) for tc in feature_test_macros -) -assert all( - all( - key - in [ - "name", - "values", - "headers", - "libcxx_guard", - "test_suite_guard", - "unimplemented", - ] - for key in tc.keys() - ) - for tc in feature_test_macros -) +for tc in feature_test_macros: + assert tc["headers"] == sorted(tc["headers"]), tc + assert ("libcxx_guard" in tc) == ("test_suite_guard" in tc), tc + valid_keys = ["name", "values", "headers", "libcxx_guard", "test_suite_guard", "unimplemented"] + assert all(key in valid_keys for key in tc.keys()), tc # Map from each header to the Lit annotations that should be used for # tests that include that header. -- cgit v1.1 From 399ff08e29de4f2bbcfd47f87bb1795ba3a4e091 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 3 Apr 2024 13:32:32 +0100 Subject: [LV] Precommit tests with any-of reductions and epilogue vectorization. Test case for failures from https://lab.llvm.org/buildbot/#/builders/74/builds/26697 caused the revert of 95fef1d in 589c7ab. --- .../epilog-vectorization-any-of-reductions.ll | 222 +++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll new file mode 100644 index 0000000..0b87270 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-any-of-reductions.ll @@ -0,0 +1,222 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -p loop-vectorize -force-vector-width=4 -enable-epilogue-vectorization -epilogue-vectorization-force-VF=4 -S %s | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" + +define i32 @any_of_reduction_epilog(ptr %src, i64 %N) { +; CHECK-LABEL: define i32 @any_of_reduction_epilog( +; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) { +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 0 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i8>, ptr [[TMP3]], align 1 +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD]], zeroinitializer +; CHECK-NEXT: [[TMP8]] = select <4 x i1> [[TMP4]], <4 x i32> , <4 x i32> [[VEC_PHI]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[TMP8]], zeroinitializer +; CHECK-NEXT: [[TMP7:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP5]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP7]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i32> [[MINMAX_IDENT_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX5:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI6:%.*]] = phi <4 x i32> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP17:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[INDEX5]], 0 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[TMP10]] +; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i8, ptr [[TMP11]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <4 x i8>, ptr [[TMP12]], align 1 +; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <4 x i8> [[WIDE_LOAD7]], zeroinitializer +; CHECK-NEXT: [[TMP17]] = select <4 x i1> [[TMP13]], <4 x i32> , <4 x i32> [[VEC_PHI6]] +; CHECK-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX5]], 4 +; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[TMP14:%.*]] = icmp ne <4 x i32> [[TMP17]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[TMP14]]) +; CHECK-NEXT: [[RDX_SELECT9:%.*]] = select i1 [[TMP16]], i32 1, i32 0 +; CHECK-NEXT: [[CMP_N4:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N4]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_MERGE_RDX10:%.*]] = phi i32 [ 0, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED:%.*]] = phi i32 [ [[BC_MERGE_RDX10]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SELECT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[GEP:%.*]] = getelementptr inbounds i8, ptr [[SRC]], i64 [[IV]] +; CHECK-NEXT: [[LOAD:%.*]] = load i8, ptr [[GEP]], align 1 +; CHECK-NEXT: [[ICMP:%.*]] = icmp eq i8 [[LOAD]], 0 +; CHECK-NEXT: [[SELECT]] = select i1 [[ICMP]], i32 1, i32 [[RED]] +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[ICMP3:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[ICMP3]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP4:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SELECT_LCSSA:%.*]] = phi i32 [ [[SELECT]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT9]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i32 [[SELECT_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red = phi i32 [ 0, %entry ], [ %select, %loop ] + %gep = getelementptr inbounds i8, ptr %src, i64 %iv + %load = load i8, ptr %gep, align 1 + %icmp = icmp eq i8 %load, 0 + %select = select i1 %icmp, i32 1, i32 %red + %iv.next = add i64 %iv, 1 + %icmp3 = icmp eq i64 %iv, %N + br i1 %icmp3, label %exit, label %loop + +exit: + ret i32 %select +} + + +define i1 @any_of_reduction_i1_epilog(i64 %N, i32 %a) { +; CHECK-LABEL: define i1 @any_of_reduction_i1_epilog( +; CHECK-SAME: i64 [[N:%.*]], i32 [[A:%.*]]) { +; CHECK-NEXT: iter.check: +; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[N]], 1 +; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; CHECK: vector.main.loop.iter.check: +; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP0]], 4 +; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; CHECK: vector.ph: +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]] +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[IND_END:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] +; CHECK: vector.body: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i1> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ , [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]] +; CHECK-NEXT: [[TMP3]] = select <4 x i1> [[TMP1]], <4 x i1> [[VEC_PHI]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_SELECT_CMP:%.*]] = icmp ne <4 x i1> [[TMP3]], zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP]]) +; CHECK-NEXT: [[RDX_SELECT:%.*]] = select i1 [[TMP6]], i1 false, i1 false +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; CHECK: vec.epilog.iter.check: +; CHECK-NEXT: [[IND_END6:%.*]] = trunc i64 [[N_VEC]] to i32 +; CHECK-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[TMP0]], [[N_VEC]] +; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 4 +; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; CHECK: vec.epilog.ph: +; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i1 [ false, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ] +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; CHECK-NEXT: [[N_MOD_VF2:%.*]] = urem i64 [[TMP0]], 4 +; CHECK-NEXT: [[N_VEC3:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF2]] +; CHECK-NEXT: [[IND_END5:%.*]] = trunc i64 [[N_VEC3]] to i32 +; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[BC_MERGE_RDX]], i64 0 +; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x i1> [[MINMAX_IDENT_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[BC_RESUME_VAL]], i64 0 +; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], +; CHECK-NEXT: [[BROADCAST_SPLATINSERT13:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT14:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT13]], <4 x i32> poison, <4 x i32> zeroinitializer +; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; CHECK: vec.epilog.vector.body: +; CHECK-NEXT: [[INDEX9:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI10:%.*]] = phi <4 x i1> [ [[MINMAX_IDENT_SPLAT]], [[VEC_EPILOG_PH]] ], [ [[TMP10:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_IND11:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VEC_EPILOG_PH]] ], [ [[VEC_IND_NEXT12:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <4 x i32> [[VEC_IND11]], [[BROADCAST_SPLAT14]] +; CHECK-NEXT: [[TMP10]] = select <4 x i1> [[TMP8]], <4 x i1> [[VEC_PHI10]], <4 x i1> zeroinitializer +; CHECK-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX9]], 4 +; CHECK-NEXT: [[VEC_IND_NEXT12]] = add <4 x i32> [[VEC_IND11]], +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] +; CHECK: vec.epilog.middle.block: +; CHECK-NEXT: [[RDX_SELECT_CMP16:%.*]] = icmp ne <4 x i1> [[TMP10]], zeroinitializer +; CHECK-NEXT: [[TMP13:%.*]] = call i1 @llvm.vector.reduce.or.v4i1(<4 x i1> [[RDX_SELECT_CMP16]]) +; CHECK-NEXT: [[RDX_SELECT16:%.*]] = select i1 [[TMP13]], i1 false, i1 false +; CHECK-NEXT: [[CMP_N8:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC3]] +; CHECK-NEXT: br i1 [[CMP_N8]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]] +; CHECK: vec.epilog.scalar.ph: +; CHECK-NEXT: [[BC_RESUME_VAL4:%.*]] = phi i64 [ [[N_VEC3]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi i32 [ [[IND_END5]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END6]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK]] ] +; CHECK-NEXT: [[BC_MERGE_RDX17:%.*]] = phi i1 [ false, [[ITER_CHECK]] ], [ [[RDX_SELECT]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL4]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[RED_I1:%.*]] = phi i1 [ [[BC_MERGE_RDX17]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[SEL:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL7]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[CMP_1:%.*]] = icmp eq i32 [[IV_2]], [[A]] +; CHECK-NEXT: [[SEL]] = select i1 [[CMP_1]], i1 [[RED_I1]], i1 false +; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 +; CHECK-NEXT: [[IV_2_NEXT]] = add i32 [[IV_2]], 1 +; CHECK-NEXT: [[CMP_2:%.*]] = icmp eq i64 [[IV]], [[N]] +; CHECK-NEXT: br i1 [[CMP_2]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP7:![0-9]+]] +; CHECK: exit: +; CHECK-NEXT: [[SEL_LCSSA:%.*]] = phi i1 [ [[SEL]], [[LOOP]] ], [ [[RDX_SELECT]], [[MIDDLE_BLOCK]] ], [ [[RDX_SELECT16]], [[VEC_EPILOG_MIDDLE_BLOCK]] ] +; CHECK-NEXT: ret i1 [[SEL_LCSSA]] +; +entry: + br label %loop + +loop: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] + %red.i1 = phi i1 [ false, %entry ], [ %sel, %loop ] + %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ] + %cmp.1 = icmp eq i32 %iv.2, %a + %sel = select i1 %cmp.1, i1 %red.i1, i1 false + %iv.next = add i64 %iv, 1 + %iv.2.next = add i32 %iv.2, 1 + %cmp.2 = icmp eq i64 %iv, %N + br i1 %cmp.2, label %exit, label %loop + +exit: + ret i1 %sel + +; uselistorder directives + uselistorder i1 %sel, { 1, 0 } +} +;. +; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]} +; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1} +; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"} +; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]} +; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META2]], [[META1]]} +; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]], [[META2]]} +; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]} +; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]} +;. -- cgit v1.1 From 2bf7ddf06f773277fcfef58a3cd8c32a161ce36a Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 13:31:44 +0100 Subject: [X86] Add vector truncation tests for nsw/nuw flags Based off #85592 - our truncation -> PACKSS/PACKUS folds should be able to use the nsw/nuw flags to recognise when we don't need to mask/sext_inreg prior to the PACKSS/PACKUS nodes. --- llvm/test/CodeGen/X86/vector-trunc-nowrap.ll | 2213 ++++++++++++++++++++++++++ 1 file changed, 2213 insertions(+) create mode 100644 llvm/test/CodeGen/X86/vector-trunc-nowrap.ll diff --git a/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll new file mode 100644 index 0000000..32c7e82 --- /dev/null +++ b/llvm/test/CodeGen/X86/vector-trunc-nowrap.ll @@ -0,0 +1,2213 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSE2 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSE2-SSSE3,SSSE3 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-SLOW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-ALL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2,AVX2-FAST-PERLANE +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512F +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512VL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BW +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL +; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX512,AVX512BWVL + +define <8 x i32> @trunc8i64_8i32_nsw(<8 x i64> %a) { +; SSE-LABEL: trunc8i64_8i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i32_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc8i64_8i32_nsw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nsw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nsw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i32_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i32> + ret <8 x i32> %0 +} + +define <8 x i32> @trunc8i64_8i32_nuw(<8 x i64> %a) { +; SSE-LABEL: trunc8i64_8i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i32_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc8i64_8i32_nuw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc8i64_8i32_nuw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc8i64_8i32_nuw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i32_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i32> + ret <8 x i32> %0 +} + +define <8 x i16> @trunc8i64_8i16_nsw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i16_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i16_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i16> + ret <8 x i16> %0 +} + +define <8 x i16> @trunc8i64_8i16_nuw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i16_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i16_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i16> + ret <8 x i16> %0 +} + +define void @trunc8i64_8i8_nsw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i8_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <8 x i64> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc8i64_8i8_nuw(<8 x i64> %a) { +; SSE2-SSSE3-LABEL: trunc8i64_8i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i64_8i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = [255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i64_8i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastsd {{.*#+}} ymm2 = [255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i64_8i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm2 = [255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0,255,0,0,0,0,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc8i64_8i8_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovqb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <8 x i64> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define <8 x i16> @trunc8i32_8i16_nsw(<8 x i32> %a) { +; SSE2-LABEL: trunc8i32_8i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i32_8i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i32> %a to <8 x i16> + ret <8 x i16> %0 +} + +define <8 x i16> @trunc8i32_8i16_nuw(<8 x i32> %a) { +; SSE2-LABEL: trunc8i32_8i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i32_8i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,1,4,5,8,9,12,13,u,u,u,u,u,u,u,u,16,17,20,21,24,25,28,29,u,u,u,u,u,u,u,u] +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,2,3] +; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i32> %a to <8 x i16> + ret <8 x i16> %0 +} + +define void @trunc8i32_8i8_nsw(<8 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc8i32_8i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i32> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc8i32_8i8_nuw(<8 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc8i32_8i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm0, %xmm0 +; SSE2-SSSE3-NEXT: movq %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i32_8i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = [255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm0, %xmm0 +; SSE41-NEXT: movq %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc8i32_8i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX1-NEXT: vmovq %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc8i32_8i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovd {{.*#+}} xmm2 = [0,4,8,12,0,0,0,0,0,0,0,0,0,0,0,0] +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX2-NEXT: vmovq %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc8i32_8i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovq %xmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i32_8i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i32_8i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vmovq %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i32_8i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovdb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i32> %a to <8 x i8> + store <8 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i16_nsw(<16 x i32> %a) { +; SSE2-LABEL: trunc16i32_16i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i32_16i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: pshufb %xmm4, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb %xmm4, %xmm3 +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i16_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdw %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <16 x i32> %a to <16 x i16> + store <16 x i16> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i16_nuw(<16 x i32> %a) { +; SSE2-LABEL: trunc16i32_16i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: pslld $16, %xmm3 +; SSE2-NEXT: psrad $16, %xmm3 +; SSE2-NEXT: pslld $16, %xmm2 +; SSE2-NEXT: psrad $16, %xmm2 +; SSE2-NEXT: packssdw %xmm3, %xmm2 +; SSE2-NEXT: movdqu %xmm2, (%rax) +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i32_16i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm4, %xmm1 +; SSSE3-NEXT: pshufb %xmm4, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: pshufb %xmm4, %xmm3 +; SSSE3-NEXT: pshufb %xmm4, %xmm2 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] +; SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1],xmm1[2],xmm4[3],xmm1[4],xmm4[5],xmm1[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1],xmm0[2],xmm4[3],xmm0[4],xmm4[5],xmm0[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1],xmm3[2],xmm4[3],xmm3[4],xmm4[5],xmm3[6],xmm4[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1],xmm2[2],xmm4[3],xmm2[4],xmm4[5],xmm2[6],xmm4[7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [65535,65535,65535,65535,65535,65535,65535,65535] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1],ymm1[2],ymm2[3],ymm1[4],ymm2[5],ymm1[6],ymm2[7],ymm1[8],ymm2[9],ymm1[10],ymm2[11],ymm1[12],ymm2[13],ymm1[14],ymm2[15] +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1],ymm0[2],ymm2[3],ymm0[4],ymm2[5],ymm0[6],ymm2[7],ymm0[8],ymm2[9],ymm0[10],ymm2[11],ymm0[12],ymm2[13],ymm0[14],ymm2[15] +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i16_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdw %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i16> + store <16 x i16> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i8_nsw(<16 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc16i32_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i8_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i32_16i8_nuw(<16 x i32> %a) { +; SSE2-SSSE3-LABEL: trunc16i32_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i32_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = [255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: packuswb %xmm2, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i32_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 +; AVX1-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i32_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm2 = [255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0,255,0,0,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackusdw %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512-LABEL: trunc16i32_16i8_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: vpmovdb %zmm0, (%rax) +; AVX512-NEXT: vzeroupper +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <16 x i32> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i16_16i8_nsw(<16 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc16i16_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i16_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i16_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i16_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc16i16_16i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc16i16_16i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc16i16_16i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc16i16_16i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <16 x i16> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc16i16_16i8_nuw(<16 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc16i16_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i16_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i16_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i16_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc16i16_16i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc16i16_16i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc16i16_16i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc16i16_16i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %ymm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <16 x i16> %a to <16 x i8> + store <16 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc32i16_32i8_nsw(<32 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc32i16_32i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc32i16_32i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc32i16_32i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc32i16_32i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc32i16_32i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, (%rax) +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc32i16_32i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc32i16_32i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc32i16_32i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <32 x i16> %a to <32 x i8> + store <32 x i8> %0, ptr undef, align 4 + ret void +} + +define void @trunc32i16_32i8_nuw(<32 x i16> %a) { +; SSE2-SSSE3-LABEL: trunc32i16_32i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm2 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm2 +; SSE2-SSSE3-NEXT: movdqu %xmm2, (%rax) +; SSE2-SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc32i16_32i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm4, %xmm2 +; SSE41-NEXT: packuswb %xmm3, %xmm2 +; SSE41-NEXT: movdqu %xmm2, (%rax) +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc32i16_32i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vmovdqu %xmm1, (%rax) +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc32i16_32i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: vmovdqu %ymm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc32i16_32i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, (%rax) +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, (%rax) +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc32i16_32i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, (%rax) +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, (%rax) +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc32i16_32i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc32i16_32i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %zmm0, (%rax) +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <32 x i16> %a to <32 x i8> + store <32 x i8> %0, ptr undef, align 4 + ret void +} + +define <8 x i32> @trunc2x4i64_8i32_nsw(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: trunc2x4i64_8i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i32_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nsw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc2x4i64_8i32_nsw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nsw <4 x i64> %a to <4 x i32> + %1 = trunc nsw <4 x i64> %b to <4 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> + ret <8 x i32> %2 +} + +define <8 x i32> @trunc2x4i64_8i32_nuw(<4 x i64> %a, <4 x i64> %b) { +; SSE-LABEL: trunc2x4i64_8i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE-NEXT: movaps %xmm2, %xmm1 +; SSE-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i32_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX1-NEXT: retq +; +; AVX2-SLOW-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-SLOW: # %bb.0: # %entry +; AVX2-SLOW-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-SLOW-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-SLOW-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-SLOW-NEXT: retq +; +; AVX2-FAST-ALL-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-FAST-ALL: # %bb.0: # %entry +; AVX2-FAST-ALL-NEXT: vmovaps {{.*#+}} ymm2 = [0,2,4,6,4,6,6,7] +; AVX2-FAST-ALL-NEXT: vpermps %ymm0, %ymm2, %ymm0 +; AVX2-FAST-ALL-NEXT: vpermps %ymm1, %ymm2, %ymm1 +; AVX2-FAST-ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-ALL-NEXT: retq +; +; AVX2-FAST-PERLANE-LABEL: trunc2x4i64_8i32_nuw: +; AVX2-FAST-PERLANE: # %bb.0: # %entry +; AVX2-FAST-PERLANE-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3],ymm1[2,3] +; AVX2-FAST-PERLANE-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX2-FAST-PERLANE-NEXT: vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm2[0,2],ymm0[4,6],ymm2[4,6] +; AVX2-FAST-PERLANE-NEXT: retq +; +; AVX512-LABEL: trunc2x4i64_8i32_nuw: +; AVX512: # %bb.0: # %entry +; AVX512-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512-NEXT: retq +entry: + %0 = trunc nuw <4 x i64> %a to <4 x i32> + %1 = trunc nuw <4 x i64> %b to <4 x i32> + %2 = shufflevector <4 x i32> %0, <4 x i32> %1, <8 x i32> + ret <8 x i32> %2 +} + +define <8 x i16> @trunc2x4i64_8i16_nsw(<4 x i64> %a, <4 x i64> %b) { +; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i64_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i16_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x4i64_8i16_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i64_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i64_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i64_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i64_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <4 x i64> %a to <4 x i16> + %1 = trunc nsw <4 x i64> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %2 +} + +define <8 x i16> @trunc2x4i64_8i16_nuw(<4 x i64> %a, <4 x i64> %b) { +; SSE2-SSSE3-LABEL: trunc2x4i64_8i16_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm0 +; SSE2-SSSE3-NEXT: psrad $16, %xmm0 +; SSE2-SSSE3-NEXT: shufps {{.*#+}} xmm2 = xmm2[0,2],xmm3[0,2] +; SSE2-SSSE3-NEXT: pslld $16, %xmm2 +; SSE2-SSSE3-NEXT: psrad $16, %xmm2 +; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i64_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm4, %xmm4 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm4[1,2,3],xmm1[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm4[1,2,3],xmm0[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0],xmm4[1,2,3],xmm3[4],xmm4[5,6,7] +; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0],xmm4[1,2,3],xmm2[4],xmm4[5,6,7] +; SSE41-NEXT: packusdw %xmm3, %xmm2 +; SSE41-NEXT: packusdw %xmm2, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x4i64_8i16_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm3[1,2,3],xmm1[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 +; AVX1-NEXT: vpblendw {{.*#+}} xmm2 = xmm2[0],xmm3[1,2,3],xmm2[4],xmm3[5,6,7] +; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm3[1,2,3],xmm0[4],xmm3[5,6,7] +; AVX1-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x4i64_8i16_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX2-NEXT: vpblendw {{.*#+}} ymm1 = ymm1[0],ymm2[1,2,3],ymm1[4],ymm2[5,6,7],ymm1[8],ymm2[9,10,11],ymm1[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm1, %xmm3 +; AVX2-NEXT: vpackusdw %xmm3, %xmm1, %xmm1 +; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3],ymm0[4],ymm2[5,6,7],ymm0[8],ymm2[9,10,11],ymm0[12],ymm2[13,14,15] +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpackusdw %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i64_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i64_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512VL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i64_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovqw %zmm1, %xmm1 +; AVX512BW-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i64_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovqw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vpmovqw %ymm1, %xmm1 +; AVX512BWVL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <4 x i64> %a to <4 x i16> + %1 = trunc nuw <4 x i64> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %2 +} + +define <4 x i32> @trunc2x2i64_4i32_nsw(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: trunc2x2i64_4i32_nsw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX-LABEL: trunc2x2i64_4i32_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x2i64_4i32_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x2i64_4i32_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x2i64_4i32_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x2i64_4i32_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <2 x i64> %a to <2 x i32> + %1 = trunc nsw <2 x i64> %b to <2 x i32> + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + ret <4 x i32> %2 +} + +define <4 x i32> @trunc2x2i64_4i32_nuw(<2 x i64> %a, <2 x i64> %b) { +; SSE-LABEL: trunc2x2i64_4i32_nuw: +; SSE: # %bb.0: # %entry +; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; SSE-NEXT: retq +; +; AVX-LABEL: trunc2x2i64_4i32_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x2i64_4i32_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x2i64_4i32_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x2i64_4i32_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[0,2] +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x2i64_4i32_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovqd %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <2 x i64> %a to <2 x i32> + %1 = trunc nuw <2 x i64> %b to <2 x i32> + %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> + ret <4 x i32> %2 +} + +define <8 x i16> @trunc2x4i32_8i16_nsw(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: trunc2x4i32_8i16_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc2x4i32_8i16_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i32_8i16_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc2x4i32_8i16_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i32_8i16_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i32_8i16_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i32_8i16_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i32_8i16_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <4 x i32> %a to <4 x i16> + %1 = trunc nsw <4 x i32> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %2 +} + +define <8 x i16> @trunc2x4i32_8i16_nuw(<4 x i32> %a, <4 x i32> %b) { +; SSE2-LABEL: trunc2x4i32_8i16_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pslld $16, %xmm1 +; SSE2-NEXT: psrad $16, %xmm1 +; SSE2-NEXT: pslld $16, %xmm0 +; SSE2-NEXT: psrad $16, %xmm0 +; SSE2-NEXT: packssdw %xmm1, %xmm0 +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc2x4i32_8i16_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x4i32_8i16_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pxor %xmm2, %xmm2 +; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; SSE41-NEXT: packusdw %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc2x4i32_8i16_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpxor %xmm2, %xmm2, %xmm2 +; AVX-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] +; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] +; AVX-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc2x4i32_8i16_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x4i32_8i16_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512VL-NEXT: vzeroupper +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x4i32_8i16_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x4i32_8i16_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovdw %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <4 x i32> %a to <4 x i16> + %1 = trunc nuw <4 x i32> %b to <4 x i16> + %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> + ret <8 x i16> %2 +} + +define <32 x i8> @trunc2x16i16_32i8_nsw(<16 x i16> %a, <16 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4 +; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x16i16_32i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: packuswb %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm4, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x16i16_32i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x16i16_32i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x16i16_32i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x16i16_32i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x16i16_32i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x16i16_32i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <16 x i16> %a to <16 x i8> + %1 = trunc nsw <16 x i16> %b to <16 x i8> + %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> + ret <32 x i8> %2 +} + +define <32 x i8> @trunc2x16i16_32i8_nuw(<16 x i16> %a, <16 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x16i16_32i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: pand %xmm4, %xmm3 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm4 +; SSE2-SSSE3-NEXT: packuswb %xmm3, %xmm4 +; SSE2-SSSE3-NEXT: movdqa %xmm4, %xmm1 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x16i16_32i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm4 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm4, %xmm1 +; SSE41-NEXT: pand %xmm4, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: pand %xmm4, %xmm3 +; SSE41-NEXT: pand %xmm2, %xmm4 +; SSE41-NEXT: packuswb %xmm3, %xmm4 +; SSE41-NEXT: movdqa %xmm4, %xmm1 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x16i16_32i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX1-NEXT: vandps %ymm2, %ymm0, %ymm0 +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm3 +; AVX1-NEXT: vpackuswb %xmm3, %xmm0, %xmm0 +; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1 +; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 +; AVX1-NEXT: vpackuswb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x16i16_32i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm2 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %ymm2, %ymm0, %ymm0 +; AVX2-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX2-NEXT: vpackuswb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x16i16_32i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x16i16_32i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero +; AVX512VL-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512VL-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero +; AVX512VL-NEXT: vpmovdb %zmm1, %xmm1 +; AVX512VL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x16i16_32i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x16i16_32i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BWVL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 +; AVX512BWVL-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <16 x i16> %a to <16 x i8> + %1 = trunc nuw <16 x i16> %b to <16 x i8> + %2 = shufflevector <16 x i8> %0, <16 x i8> %1, <32 x i32> + ret <32 x i8> %2 +} + +define <16 x i8> @trunc2x8i16_16i8_nsw(<8 x i16> %a, <8 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nsw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x8i16_16i8_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x8i16_16i8_nsw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x8i16_16i8_nsw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x8i16_16i8_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x8i16_16i8_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x8i16_16i8_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x8i16_16i8_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i16> %a to <8 x i8> + %1 = trunc nsw <8 x i16> %b to <8 x i8> + %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + ret <16 x i8> %2 +} + +define <16 x i8> @trunc2x8i16_16i8_nuw(<8 x i16> %a, <8 x i16> %b) { +; SSE2-SSSE3-LABEL: trunc2x8i16_16i8_nuw: +; SSE2-SSSE3: # %bb.0: # %entry +; SSE2-SSSE3-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm1 +; SSE2-SSSE3-NEXT: pand %xmm2, %xmm0 +; SSE2-SSSE3-NEXT: packuswb %xmm1, %xmm0 +; SSE2-SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc2x8i16_16i8_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE41-NEXT: pand %xmm2, %xmm1 +; SSE41-NEXT: pand %xmm2, %xmm0 +; SSE41-NEXT: packuswb %xmm1, %xmm0 +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc2x8i16_16i8_nuw: +; AVX1: # %bb.0: # %entry +; AVX1-NEXT: vbroadcastss {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc2x8i16_16i8_nuw: +; AVX2: # %bb.0: # %entry +; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc2x8i16_16i8_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpbroadcastw {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512F-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512F-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc2x8i16_16i8_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpbroadcastd {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; AVX512VL-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX512VL-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc2x8i16_16i8_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BW-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc2x8i16_16i8_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: # kill: def $xmm0 killed $xmm0 def $ymm0 +; AVX512BWVL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 +; AVX512BWVL-NEXT: vpmovwb %ymm0, %xmm0 +; AVX512BWVL-NEXT: vzeroupper +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i16> %a to <8 x i8> + %1 = trunc nuw <8 x i16> %b to <8 x i8> + %2 = shufflevector <8 x i8> %0, <8 x i8> %1, <16 x i32> + ret <16 x i8> %2 +} + +define i64 @trunc8i16_i64_nsw(<8 x i16> %inval) { +; SSE2-LABEL: trunc8i16_i64_nsw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, %rax +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i16_i64_nsw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq %xmm0, %rax +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i16_i64_nsw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movq %xmm0, %rax +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc8i16_i64_nsw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc8i16_i64_nsw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i16_i64_nsw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i16_i64_nsw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovq %xmm0, %rax +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i16_i64_nsw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0 +; AVX512BWVL-NEXT: vmovq %xmm0, %rax +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nsw <8 x i16> %inval to <8 x i8> + %1 = bitcast <8 x i8> %0 to i64 + ret i64 %1 +} + +define i64 @trunc8i16_i64_nuw(<8 x i16> %inval) { +; SSE2-LABEL: trunc8i16_i64_nuw: +; SSE2: # %bb.0: # %entry +; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 +; SSE2-NEXT: packuswb %xmm0, %xmm0 +; SSE2-NEXT: movq %xmm0, %rax +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc8i16_i64_nuw: +; SSSE3: # %bb.0: # %entry +; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSSE3-NEXT: movq %xmm0, %rax +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc8i16_i64_nuw: +; SSE41: # %bb.0: # %entry +; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; SSE41-NEXT: movq %xmm0, %rax +; SSE41-NEXT: retq +; +; AVX-LABEL: trunc8i16_i64_nuw: +; AVX: # %bb.0: # %entry +; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX-NEXT: vmovq %xmm0, %rax +; AVX-NEXT: retq +; +; AVX512F-LABEL: trunc8i16_i64_nuw: +; AVX512F: # %bb.0: # %entry +; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vmovq %xmm0, %rax +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc8i16_i64_nuw: +; AVX512VL: # %bb.0: # %entry +; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vmovq %xmm0, %rax +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc8i16_i64_nuw: +; AVX512BW: # %bb.0: # %entry +; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vmovq %xmm0, %rax +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc8i16_i64_nuw: +; AVX512BWVL: # %bb.0: # %entry +; AVX512BWVL-NEXT: vpmovwb %xmm0, %xmm0 +; AVX512BWVL-NEXT: vmovq %xmm0, %rax +; AVX512BWVL-NEXT: retq +entry: + %0 = trunc nuw <8 x i16> %inval to <8 x i8> + %1 = bitcast <8 x i8> %0 to i64 + ret i64 %1 +} -- cgit v1.1 From 7ec87c473936245ea11f8bb64c936e5112f25e6a Mon Sep 17 00:00:00 2001 From: Daniel Chen Date: Wed, 3 Apr 2024 08:51:14 -0400 Subject: [Flang] Support for procedure pointer component default initialization. (#87356) This PR is to address `TODO(loc, "procedure pointer component default initialization");`. It handles default init for procedure pointer components in a derived type that is 32 bytes or larger (Default init for smaller size type has already been handled). ``` interface subroutine sub() end end interface type dt real :: r1 = 5.0 procedure(real), pointer, nopass :: pp1 => null() real, pointer :: rp1 => null() procedure(), pointer, nopass :: pp2 => sub end type type(dt) :: dd1 end ``` --- flang/lib/Lower/ConvertVariable.cpp | 13 +++++-- .../procedure-pointer-component-default-init.f90 | 41 ++++++++++++++++++++++ 2 files changed, 51 insertions(+), 3 deletions(-) create mode 100644 flang/test/Lower/HLFIR/procedure-pointer-component-default-init.f90 diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp index e07ae42..f59c784 100644 --- a/flang/lib/Lower/ConvertVariable.cpp +++ b/flang/lib/Lower/ConvertVariable.cpp @@ -358,9 +358,16 @@ static mlir::Value genComponentDefaultInit( } else if (const auto *proc{ component .detailsIf()}) { - if (proc->init().has_value()) - TODO(loc, "procedure pointer component default initialization"); - else + if (proc->init().has_value()) { + auto sym{*proc->init()}; + if (sym) // Has a procedure target. + componentValue = + Fortran::lower::convertProcedureDesignatorInitialTarget(converter, + loc, *sym); + else // Has NULL() target. + componentValue = + fir::factory::createNullBoxProc(builder, loc, componentTy); + } else componentValue = builder.create(loc, componentTy); } assert(componentValue && "must have been computed"); diff --git a/flang/test/Lower/HLFIR/procedure-pointer-component-default-init.f90 b/flang/test/Lower/HLFIR/procedure-pointer-component-default-init.f90 new file mode 100644 index 0000000..8593126 --- /dev/null +++ b/flang/test/Lower/HLFIR/procedure-pointer-component-default-init.f90 @@ -0,0 +1,41 @@ +! Test procedure pointer component default initialization when the size +! of the derived type is 32 bytes and larger. +! RUN: bbc -emit-hlfir -o - %s | FileCheck %s + + interface + subroutine sub() + end + end interface + type dt + real :: r1 = 5.0 + procedure(real), pointer, nopass :: pp1 => null() + real, pointer :: rp1 => null() + procedure(), pointer, nopass :: pp2 => sub + end type + type(dt) :: dd1 + end + +! CHECK-LABEL: func.func @_QQmain() { +! CHECK: %[[VAL_14:.*]] = fir.address_of(@_QFEdd1) : !fir.ref f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>> +! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFEdd1"} : (!fir.ref f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>>) -> (!fir.ref f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>>, !fir.ref f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>>) +! CHECK: } + +! CHECK-LABEL: fir.global internal @_QFEdd1 : !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> { +! CHECK: %[[VAL_0:.*]] = fir.undefined !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %cst = arith.constant 5.000000e+00 : f32 +! CHECK: %[[VAL_1:.*]] = fir.field_index r1, !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_2:.*]] = fir.insert_value %[[VAL_0]], %cst, ["r1", !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>] : (!fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>, f32) -> !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_3:.*]] = fir.zero_bits () -> f32 +! CHECK: %[[VAL_4:.*]] = fir.emboxproc %[[VAL_3]] : (() -> f32) -> !fir.boxproc<() -> f32> +! CHECK: %[[VAL_5:.*]] = fir.field_index pp1, !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_6:.*]] = fir.insert_value %[[VAL_2]], %[[VAL_4]], ["pp1", !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>] : (!fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>, !fir.boxproc<() -> f32>) -> !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_7:.*]] = fir.zero_bits !fir.ptr +! CHECK: %[[VAL_8:.*]] = fir.embox %[[VAL_7]] : (!fir.ptr) -> !fir.box> +! CHECK: %[[VAL_9:.*]] = fir.field_index rp1, !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_10:.*]] = fir.insert_value %[[VAL_6]], %[[VAL_8]], ["rp1", !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>] : (!fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>, !fir.box>) -> !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_11:.*]] = fir.address_of(@_QPsub) : () -> () +! CHECK: %[[VAL_12:.*]] = fir.emboxproc %[[VAL_11]] : (() -> ()) -> !fir.boxproc<() -> ()> +! CHECK: %[[VAL_13:.*]] = fir.field_index pp2, !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: %[[VAL_14:.*]] = fir.insert_value %[[VAL_10]], %[[VAL_12]], ["pp2", !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>] : (!fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}>, !fir.boxproc<() -> ()>) -> !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: fir.has_value %[[VAL_14]] : !fir.type<_QFTdt{r1:f32,pp1:!fir.boxproc<() -> f32>,rp1:!fir.box>,pp2:!fir.boxproc<() -> ()>}> +! CHECK: } -- cgit v1.1 From a2acf3132334e3131ec584c2c54ec5ba2214e074 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Wed, 3 Apr 2024 14:54:29 +0200 Subject: [MLIR] Add endianness accessors to the data layout (#87347) This commit extends the data layout subsystem with accessors for the endianness. The implementation follows the structure implemented for alloca, global, and program memory spaces. --- mlir/include/mlir/Dialect/DLTI/DLTI.h | 3 +++ .../include/mlir/Interfaces/DataLayoutInterfaces.h | 9 ++++++++ .../mlir/Interfaces/DataLayoutInterfaces.td | 18 ++++++++++++++++ mlir/lib/Dialect/DLTI/DLTI.cpp | 5 +++++ mlir/lib/Interfaces/DataLayoutInterfaces.cpp | 25 ++++++++++++++++++++++ mlir/test/Dialect/LLVMIR/layout.mlir | 8 +++++++ mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp | 4 ++++ .../Interfaces/DataLayoutInterfacesTest.cpp | 18 ++++++++++++++++ 8 files changed, 90 insertions(+) diff --git a/mlir/include/mlir/Dialect/DLTI/DLTI.h b/mlir/include/mlir/Dialect/DLTI/DLTI.h index bf23aa2..5ac7c11 100644 --- a/mlir/include/mlir/Dialect/DLTI/DLTI.h +++ b/mlir/include/mlir/Dialect/DLTI/DLTI.h @@ -100,6 +100,9 @@ public: /// Returns the list of entries. DataLayoutEntryListRef getEntries() const; + /// Returns the endiannes identifier. + StringAttr getEndiannessIdentifier(MLIRContext *context) const; + /// Returns the alloca memory space identifier. StringAttr getAllocaMemorySpaceIdentifier(MLIRContext *context) const; diff --git a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h index 0463546..76bf33e 100644 --- a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h +++ b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.h @@ -64,6 +64,10 @@ std::optional getDefaultIndexBitwidth(Type type, const DataLayout &dataLayout, ArrayRef params); +/// Default handler for endianness request. Dispatches to the +/// DataLayoutInterface if specified, otherwise returns the default. +Attribute getDefaultEndianness(DataLayoutEntryInterface entry); + /// Default handler for alloca memory space request. Dispatches to the /// DataLayoutInterface if specified, otherwise returns the default. Attribute getDefaultAllocaMemorySpace(DataLayoutEntryInterface entry); @@ -192,6 +196,9 @@ public: /// type is not a pointer-like type, it returns std::nullopt. std::optional getTypeIndexBitwidth(Type t) const; + /// Returns the specified endianness. + Attribute getEndianness() const; + /// Returns the memory space used for AllocaOps. Attribute getAllocaMemorySpace() const; @@ -230,6 +237,8 @@ private: mutable DenseMap preferredAlignments; mutable DenseMap> indexBitwidths; + /// Cache for the endianness. + mutable std::optional endianness; /// Cache for alloca, global, and program memory spaces. mutable std::optional allocaMemorySpace; mutable std::optional programMemorySpace; diff --git a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td index 0ee7a11..9edc885 100644 --- a/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td +++ b/mlir/include/mlir/Interfaces/DataLayoutInterfaces.td @@ -107,6 +107,12 @@ def DataLayoutSpecInterface : AttrInterface<"DataLayoutSpecInterface"> { /*args=*/(ins) >, InterfaceMethod< + /*description=*/"Returns the endianness identifier.", + /*retTy=*/"::mlir::StringAttr", + /*methodName=*/"getEndiannessIdentifier", + /*args=*/(ins "::mlir::MLIRContext *":$context) + >, + InterfaceMethod< /*description=*/"Returns the alloca memory space identifier.", /*retTy=*/"::mlir::StringAttr", /*methodName=*/"getAllocaMemorySpaceIdentifier", @@ -297,6 +303,18 @@ def DataLayoutOpInterface : OpInterface<"DataLayoutOpInterface"> { }] >, StaticInterfaceMethod< + /*description=*/"Returns the endianness used by the ABI computed " + "using the relevant entries. The data layout object " + "can be used for recursive queries.", + /*retTy=*/"::mlir::Attribute", + /*methodName=*/"getEndianness", + /*args=*/(ins "::mlir::DataLayoutEntryInterface":$entry), + /*methodBody=*/"", + /*defaultImplementation=*/[{ + return ::mlir::detail::getDefaultEndianness(entry); + }] + >, + StaticInterfaceMethod< /*description=*/"Returns the memory space used by the ABI computed " "using the relevant entries. The data layout object " "can be used for recursive queries.", diff --git a/mlir/lib/Dialect/DLTI/DLTI.cpp b/mlir/lib/Dialect/DLTI/DLTI.cpp index daef234..98a8865 100644 --- a/mlir/lib/Dialect/DLTI/DLTI.cpp +++ b/mlir/lib/Dialect/DLTI/DLTI.cpp @@ -282,6 +282,11 @@ DataLayoutEntryListRef DataLayoutSpecAttr::getEntries() const { } StringAttr +DataLayoutSpecAttr::getEndiannessIdentifier(MLIRContext *context) const { + return Builder(context).getStringAttr(DLTIDialect::kDataLayoutEndiannessKey); +} + +StringAttr DataLayoutSpecAttr::getAllocaMemorySpaceIdentifier(MLIRContext *context) const { return Builder(context).getStringAttr( DLTIDialect::kDataLayoutAllocaMemorySpaceKey); diff --git a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp index b5b7d78..e93a9ef 100644 --- a/mlir/lib/Interfaces/DataLayoutInterfaces.cpp +++ b/mlir/lib/Interfaces/DataLayoutInterfaces.cpp @@ -234,6 +234,15 @@ std::optional mlir::detail::getDefaultIndexBitwidth( return std::nullopt; } +// Returns the endianness if specified in the given entry. If the entry is empty +// the default endianness represented by an empty attribute is returned. +Attribute mlir::detail::getDefaultEndianness(DataLayoutEntryInterface entry) { + if (entry == DataLayoutEntryInterface()) + return Attribute(); + + return entry.getValue(); +} + // Returns the memory space used for alloca operations if specified in the // given entry. If the entry is empty the default memory space represented by // an empty attribute is returned. @@ -548,6 +557,22 @@ std::optional mlir::DataLayout::getTypeIndexBitwidth(Type t) const { }); } +mlir::Attribute mlir::DataLayout::getEndianness() const { + checkValid(); + if (endianness) + return *endianness; + DataLayoutEntryInterface entry; + if (originalLayout) + entry = originalLayout.getSpecForIdentifier( + originalLayout.getEndiannessIdentifier(originalLayout.getContext())); + + if (auto iface = dyn_cast_or_null(scope)) + endianness = iface.getEndianness(entry); + else + endianness = detail::getDefaultEndianness(entry); + return *endianness; +} + mlir::Attribute mlir::DataLayout::getAllocaMemorySpace() const { checkValid(); if (allocaMemorySpace) diff --git a/mlir/test/Dialect/LLVMIR/layout.mlir b/mlir/test/Dialect/LLVMIR/layout.mlir index a78fb77..4813089 100644 --- a/mlir/test/Dialect/LLVMIR/layout.mlir +++ b/mlir/test/Dialect/LLVMIR/layout.mlir @@ -6,6 +6,7 @@ module { // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 + // CHECK: endianness = "" // CHECK: global_memory_space = 0 // CHECK: index = 64 // CHECK: preferred = 8 @@ -16,6 +17,7 @@ module { // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 + // CHECK: endianness = "" // CHECK: global_memory_space = 0 // CHECK: index = 64 // CHECK: preferred = 8 @@ -26,6 +28,7 @@ module { // CHECK: alignment = 8 // CHECK: alloca_memory_space = 0 // CHECK: bitsize = 64 + // CHECK: endianness = "" // CHECK: global_memory_space = 0 // CHECK: index = 64 // CHECK: preferred = 8 @@ -43,6 +46,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< #dlti.dl_entry : vector<3xi64>>, #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi64>>, #dlti.dl_entry, dense<[32, 64, 64, 24]> : vector<4xi64>>, + #dlti.dl_entry<"dlti.endianness", "little">, #dlti.dl_entry<"dlti.alloca_memory_space", 5 : ui64>, #dlti.dl_entry<"dlti.global_memory_space", 2 : ui64>, #dlti.dl_entry<"dlti.program_memory_space", 3 : ui64>, @@ -53,6 +57,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 + // CHECK: endianness = "little" // CHECK: global_memory_space = 2 // CHECK: index = 32 // CHECK: preferred = 8 @@ -63,6 +68,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: alignment = 4 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 + // CHECK: endianness = "little" // CHECK: global_memory_space = 2 // CHECK: index = 32 // CHECK: preferred = 8 @@ -73,6 +79,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: alignment = 8 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 64 + // CHECK: endianness = "little" // CHECK: global_memory_space = 2 // CHECK: index = 64 // CHECK: preferred = 8 @@ -83,6 +90,7 @@ module attributes { dlti.dl_spec = #dlti.dl_spec< // CHECK: alignment = 8 // CHECK: alloca_memory_space = 5 // CHECK: bitsize = 32 + // CHECK: endianness = "little" // CHECK: global_memory_space = 2 // CHECK: index = 24 // CHECK: preferred = 8 diff --git a/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp b/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp index 3da48ff..a4464bb 100644 --- a/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp +++ b/mlir/test/lib/Dialect/DLTI/TestDataLayoutQuery.cpp @@ -41,6 +41,7 @@ struct TestDataLayoutQuery uint64_t alignment = layout.getTypeABIAlignment(op.getType()); uint64_t preferred = layout.getTypePreferredAlignment(op.getType()); uint64_t index = layout.getTypeIndexBitwidth(op.getType()).value_or(0); + Attribute endianness = layout.getEndianness(); Attribute allocaMemorySpace = layout.getAllocaMemorySpace(); Attribute programMemorySpace = layout.getProgramMemorySpace(); Attribute globalMemorySpace = layout.getGlobalMemorySpace(); @@ -51,6 +52,9 @@ struct TestDataLayoutQuery builder.getNamedAttr("alignment", builder.getIndexAttr(alignment)), builder.getNamedAttr("preferred", builder.getIndexAttr(preferred)), builder.getNamedAttr("index", builder.getIndexAttr(index)), + builder.getNamedAttr("endianness", endianness == Attribute() + ? builder.getStringAttr("") + : endianness), builder.getNamedAttr("alloca_memory_space", allocaMemorySpace == Attribute() ? builder.getUI32IntegerAttr(0) diff --git a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp index d6b8d73..5f48429 100644 --- a/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp +++ b/mlir/unittests/Interfaces/DataLayoutInterfacesTest.cpp @@ -22,6 +22,7 @@ using namespace mlir; namespace { constexpr static llvm::StringLiteral kAttrName = "dltest.layout"; +constexpr static llvm::StringLiteral kEndiannesKeyName = "dltest.endianness"; constexpr static llvm::StringLiteral kAllocaKeyName = "dltest.alloca_memory_space"; constexpr static llvm::StringLiteral kProgramKeyName = @@ -73,6 +74,9 @@ struct CustomDataLayoutSpec } DataLayoutEntryListRef getEntries() const { return getImpl()->entries; } LogicalResult verifySpec(Location loc) { return success(); } + StringAttr getEndiannessIdentifier(MLIRContext *context) const { + return Builder(context).getStringAttr(kEndiannesKeyName); + } StringAttr getAllocaMemorySpaceIdentifier(MLIRContext *context) const { return Builder(context).getStringAttr(kAllocaKeyName); } @@ -130,6 +134,15 @@ struct SingleQueryType return 4; } + Attribute getEndianness(DataLayoutEntryInterface entry) { + static bool executed = false; + if (executed) + llvm::report_fatal_error("repeated call"); + + executed = true; + return Attribute(); + } + Attribute getAllocaMemorySpace(DataLayoutEntryInterface entry) { static bool executed = false; if (executed) @@ -317,6 +330,7 @@ module {} EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 42)), 8u); EXPECT_EQ(layout.getTypePreferredAlignment(Float16Type::get(&ctx)), 2u); + EXPECT_EQ(layout.getEndianness(), Attribute()); EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); EXPECT_EQ(layout.getProgramMemorySpace(), Attribute()); EXPECT_EQ(layout.getGlobalMemorySpace(), Attribute()); @@ -348,6 +362,7 @@ TEST(DataLayout, NullSpec) { EXPECT_EQ(layout.getTypeIndexBitwidth(Float16Type::get(&ctx)), std::nullopt); EXPECT_EQ(layout.getTypeIndexBitwidth(IndexType::get(&ctx)), 64u); + EXPECT_EQ(layout.getEndianness(), Attribute()); EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); EXPECT_EQ(layout.getProgramMemorySpace(), Attribute()); EXPECT_EQ(layout.getGlobalMemorySpace(), Attribute()); @@ -378,6 +393,7 @@ TEST(DataLayout, EmptySpec) { EXPECT_EQ(layout.getTypeIndexBitwidth(Float16Type::get(&ctx)), std::nullopt); EXPECT_EQ(layout.getTypeIndexBitwidth(IndexType::get(&ctx)), 64u); + EXPECT_EQ(layout.getEndianness(), Attribute()); EXPECT_EQ(layout.getAllocaMemorySpace(), Attribute()); EXPECT_EQ(layout.getProgramMemorySpace(), Attribute()); EXPECT_EQ(layout.getGlobalMemorySpace(), Attribute()); @@ -390,6 +406,7 @@ TEST(DataLayout, SpecWithEntries) { #dlti.dl_entry, #dlti.dl_entry, #dlti.dl_entry, + #dlti.dl_entry<"dltest.endianness", "little">, #dlti.dl_entry<"dltest.alloca_memory_space", 5 : i32>, #dlti.dl_entry<"dltest.program_memory_space", 3 : i32>, #dlti.dl_entry<"dltest.global_memory_space", 2 : i32>, @@ -425,6 +442,7 @@ TEST(DataLayout, SpecWithEntries) { EXPECT_EQ(layout.getTypePreferredAlignment(IntegerType::get(&ctx, 32)), 64u); EXPECT_EQ(layout.getTypePreferredAlignment(Float32Type::get(&ctx)), 64u); + EXPECT_EQ(layout.getEndianness(), Builder(&ctx).getStringAttr("little")); EXPECT_EQ(layout.getAllocaMemorySpace(), Builder(&ctx).getI32IntegerAttr(5)); EXPECT_EQ(layout.getProgramMemorySpace(), Builder(&ctx).getI32IntegerAttr(3)); EXPECT_EQ(layout.getGlobalMemorySpace(), Builder(&ctx).getI32IntegerAttr(2)); -- cgit v1.1 From 450f1952aced87584a53485d1ba1c2f77c3835a1 Mon Sep 17 00:00:00 2001 From: Axel Lundberg <19574357+Zonotora@users.noreply.github.com> Date: Wed, 3 Apr 2024 14:55:03 +0200 Subject: [clang][UBSan] Add implicit conversion check for bitfields (#75481) This patch implements the implicit truncation and implicit sign change checks for bitfields using UBSan. E.g., `-fsanitize=implicit-bitfield-truncation` and `-fsanitize=implicit-bitfield-sign-change`. --- clang/docs/ReleaseNotes.rst | 7 + clang/docs/UndefinedBehaviorSanitizer.rst | 19 +- clang/include/clang/Basic/Sanitizers.def | 20 +- clang/lib/CodeGen/CGExpr.cpp | 37 ++- clang/lib/CodeGen/CGExprScalar.cpp | 257 ++++++++++++++++++--- clang/lib/CodeGen/CodeGenFunction.h | 15 ++ clang/test/CodeGen/ubsan-bitfield-conversion.c | 61 +++++ .../test/CodeGenCXX/ubsan-bitfield-conversion.cpp | 94 ++++++++ clang/test/Driver/fsanitize.c | 28 +-- compiler-rt/lib/ubsan/ubsan_handlers.cpp | 27 ++- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 + 11 files changed, 493 insertions(+), 73 deletions(-) create mode 100644 clang/test/CodeGen/ubsan-bitfield-conversion.c create mode 100644 clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 3237842..096376a 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -195,6 +195,10 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and + sign change. +- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous + group ``-fsanitize=implicit-conversion``. - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave @@ -208,6 +212,9 @@ Modified Compiler Flags - Added a new diagnostic flag ``-Wreturn-mismatch`` which is grouped under ``-Wreturn-type``, and moved some of the diagnostics previously controlled by ``-Wreturn-type`` under this new flag. Fixes #GH72116. +- ``-fsanitize=implicit-conversion`` is now a group for both + ``-fsanitize=implicit-integer-conversion`` and + ``-fsanitize=implicit-bitfield-conversion``. - Added ``-Wcast-function-type-mismatch`` under the ``-Wcast-function-type`` warning group. Moved the diagnostic previously controlled by diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 8f58c92..531d56e 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -148,6 +148,11 @@ Available checks are: Issues caught by this sanitizer are not undefined behavior, but are often unintentional. - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. + - ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from + integer of larger bit width to smaller bitfield, if that results in data + loss. This includes unsigned/signed truncations and sign changes, similarly + to how the ``-fsanitize=implicit-integer-conversion`` group works, but + explicitly for bitfields. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function parameter which is declared to never be null. - ``-fsanitize=null``: Use of a null pointer or creation of a null @@ -193,8 +198,8 @@ Available checks are: signed division overflow (``INT_MIN/-1``). Note that checks are still added even when ``-fwrapv`` is enabled. This sanitizer does not check for lossy implicit conversions performed before the computation (see - ``-fsanitize=implicit-conversion``). Both of these two issues are handled - by ``-fsanitize=implicit-conversion`` group of checks. + ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled + by ``-fsanitize=implicit-integer-conversion`` group of checks. - ``-fsanitize=unreachable``: If control flow reaches an unreachable program point. - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where @@ -202,7 +207,7 @@ Available checks are: type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation - (see ``-fsanitize=implicit-conversion``). + (see ``-fsanitize=implicit-integer-conversion``). - ``-fsanitize=vla-bound``: A variable-length array whose bound does not evaluate to a positive value. - ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of @@ -224,11 +229,15 @@ You can also use the following check groups: - ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit conversions that change the arithmetic value of the integer. Enables ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-conversion``: Checks for suspicious - behavior of implicit conversions. Enables + - ``-fsanitize=implicit-integer-conversion``: Checks for suspicious + behavior of implicit integer conversions. Enables ``implicit-unsigned-integer-truncation``, ``implicit-signed-integer-truncation``, and ``implicit-integer-sign-change``. + - ``-fsanitize=implicit-conversion``: Checks for suspicious + behavior of implicit conversions. Enables + ``implicit-integer-conversion``, and + ``implicit-bitfield-conversion``. - ``-fsanitize=integer``: Checks for undefined or suspicious integer behavior (e.g. unsigned integer overflow). Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``, diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index c2137e3..b228ffd07 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change", ImplicitIntegerArithmeticValueChange, ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation) -SANITIZER("objc-cast", ObjCCast) +SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, + ImplicitIntegerArithmeticValueChange | + ImplicitUnsignedIntegerTruncation) -// FIXME: -//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, -// ImplicitIntegerArithmeticValueChange | -// ImplicitUnsignedIntegerTruncation) -//SANITIZER_GROUP("implicit-conversion", ImplicitConversion, -// ImplicitIntegerConversion) +// Implicit bitfield sanitizers +SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion) SANITIZER_GROUP("implicit-conversion", ImplicitConversion, - ImplicitIntegerArithmeticValueChange | - ImplicitUnsignedIntegerTruncation) + ImplicitIntegerConversion | + ImplicitBitfieldConversion) SANITIZER_GROUP("integer", Integer, - ImplicitConversion | IntegerDivideByZero | Shift | + ImplicitIntegerConversion | IntegerDivideByZero | Shift | SignedIntegerOverflow | UnsignedIntegerOverflow | UnsignedShiftBase) +SANITIZER("objc-cast", ObjCCast) + SANITIZER("local-bounds", LocalBounds) SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5443235..f70324d 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5580,11 +5580,44 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } - RValue RV = EmitAnyExpr(E->getRHS()); + // TODO: Can we de-duplicate this code with the corresponding code in + // CGExprScalar, similar to the way EmitCompoundAssignmentLValue works? + RValue RV; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) { + llvm::Value *RHS = + EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); + RV = RValue::get(RHS); + } else + RV = EmitAnyExpr(E->getRHS()); + LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); - EmitStoreThroughLValue(RV, LV); + + if (LV.isBitField()) { + llvm::Value *Result; + // If bitfield sanitizers are enabled we want to use the result + // to check whether a truncation or sign change has occurred. + if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + EmitStoreThroughBitfieldLValue(RV, LV, &Result); + else + EmitStoreThroughBitfieldLValue(RV, LV); + + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + llvm::Value *Src = Previous ? Previous : RV.getScalarVal(); + QualType DstType = E->getLHS()->getType(); + EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LV.getBitFieldInfo(), E->getExprLoc()); + } else + EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, E->getLHS()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 397b497..a4ab8a11 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -15,6 +15,7 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -308,6 +309,7 @@ public: llvm::Type *DstTy, SourceLocation Loc); /// Known implicit conversion check kinds. + /// This is used for bitfield conversion checks as well. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. @@ -1103,6 +1105,21 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, {Src, Dst}); } +static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType, + const char *Name, + CGBuilderTy &Builder) { + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); +} + // Should be called within CodeGenFunction::SanitizerScope RAII scope. // Returns 'i1 false' when the conversion Src -> Dst changed the sign. static std::pair Value * { - // Is this value a signed type? - bool VSigned = VType->isSignedIntegerOrEnumerationType(); - llvm::Type *VTy = V->getType(); - if (!VSigned) { - // If the value is unsigned, then it is never negative. - // FIXME: can we encounter non-scalar VTy here? - return llvm::ConstantInt::getFalse(VTy->getContext()); - } - // Get the zero of the same type with which we will be comparing. - llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); - // %V.isnegative = icmp slt %V, 0 - // I.e is %V *strictly* less than zero, does it have negative value? - return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, - llvm::Twine(Name) + "." + V->getName() + - ".negativitycheck"); - }; - // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "src", Builder); // 2. Is the new Value negative? - llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder); // 3. Now, was the 'negativity status' preserved during the conversion? // NOTE: conversion from negative to zero is considered to change the sign. // (We want to get 'false' when the conversion changed the sign) @@ -1245,6 +1244,136 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, {Src, Dst}); } +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the truncation Src -> Dst was lossy. +static std::pair> +EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + ScalarExprEmitter::ImplicitConversionCheckKind Kind; + if (!SrcSigned && !DstSigned) + Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; + else + Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; + + llvm::Value *Check = nullptr; + // 1. Extend the truncated value back to the same width as the Src. + Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext"); + // 2. Equality-compare with the original source value + Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck"); + // If the comparison result is 'i1 false', then the truncation was lossy. + + return std::make_pair( + Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the conversion Src -> Dst changed the sign. +static std::pair> +EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + // 1. Was the old Value negative? + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder); + // 2. Is the new Value negative? + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder); + // 3. Now, was the 'negativity status' preserved during the conversion? + // NOTE: conversion from negative to zero is considered to change the sign. + // (We want to get 'false' when the conversion changed the sign) + // So we should just equality-compare the negativity statuses. + llvm::Value *Check = nullptr; + Check = + Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck"); + // If the comparison result is 'false', then the conversion changed the sign. + return std::make_pair( + ScalarExprEmitter::ICCK_IntegerSignChange, + std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType, + Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc) { + + if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + return; + + // We only care about int->int conversions here. + // We ignore conversions to/from pointer and/or bool. + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) + return; + + if (DstType->isBooleanType() || SrcType->isBooleanType()) + return; + + // This should be truncation of integral types. + assert(isa(Src->getType()) && + isa(Dst->getType()) && "non-integer llvm type"); + + // TODO: Calculate src width to avoid emitting code + // for unecessary cases. + unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits(); + unsigned DstBits = Info.Size; + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + CodeGenFunction::SanitizerScope SanScope(this); + + std::pair> + Check; + + // Truncation + bool EmitTruncation = DstBits < SrcBits; + // If Dst is signed and Src unsigned, we want to be more specific + // about the CheckKind we emit, in this case we want to emit + // ICCK_SignedIntegerTruncationOrSignChange. + bool EmitTruncationFromUnsignedToSigned = + EmitTruncation && DstSigned && !SrcSigned; + // Sign change + bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits; + bool BothUnsigned = !SrcSigned && !DstSigned; + bool LargerSigned = (DstBits > SrcBits) && DstSigned; + // We can avoid emitting sign change checks in some obvious cases + // 1. If Src and Dst have the same signedness and size + // 2. If both are unsigned sign check is unecessary! + // 3. If Dst is signed and bigger than Src, either + // sign-extension or zero-extension will make sure + // the sign remains. + bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned; + + if (EmitTruncation) + Check = + EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + else if (EmitSignChange) { + assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && + "either the widths should be different, or the signednesses."); + Check = + EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); + } else + return; + + ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first; + if (EmitTruncationFromUnsignedToSigned) + CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange; + + llvm::Constant *StaticArgs[] = { + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType), + EmitCheckTypeDescriptor(DstType), + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), + llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)}; + + EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); +} + Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, QualType DstType, llvm::Type *SrcTy, llvm::Type *DstTy, @@ -2620,6 +2749,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; llvm::Value *input; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getType(); int amount = (isInc ? 1 : -1); bool isSubtraction = !isInc; @@ -2708,7 +2839,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, "base or promoted) will be signed, or the bitwidths will match."); } if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange) && + SanitizerKind::ImplicitIntegerArithmeticValueChange | + SanitizerKind::ImplicitBitfieldConversion) && canPerformLossyDemotionCheck) { // While `x += 1` (for `x` with width less than int) is modeled as // promotion+arithmetics+demotion, and we can catch lossy demotion with @@ -2719,13 +2851,26 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // the increment/decrement in the wider type, and finally // perform the demotion. This will catch lossy demotions. + // We have a special case for bitfields defined using all the bits of the + // type. In this case we need to do the same trick as for the integer + // sanitizer checks, i.e., promotion -> increment/decrement -> demotion. + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted. + // emitted if LV is not a bitfield, otherwise the bitfield sanitizer + // checks will take care of the conversion. + ScalarConversionOpts Opts; + if (!LV.isBitField()) + Opts = ScalarConversionOpts(CGF.SanOpts); + else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) { + Previous = value; + SrcType = promotedType; + } + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - ScalarConversionOpts(CGF.SanOpts)); + Opts); // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps @@ -2910,9 +3055,12 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } // Store the updated result through the lvalue. - if (LV.isBitField()) + if (LV.isBitField()) { + Value *Src = Previous ? Previous : value; CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(), + LV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(value), LV); // If this is a postinc, return the value read from memory, otherwise use the @@ -3417,8 +3565,15 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, - ScalarConversionOpts(CGF.SanOpts)); + // If LHSLV is a bitfield, use default ScalarConversionOpts + // to avoid emit any implicit integer checks. + Value *Previous = nullptr; + if (LHSLV.isBitField()) { + Previous = Result; + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc); + } else + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3437,9 +3592,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // specially because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after the // assignment...'. - if (LHSLV.isBitField()) + if (LHSLV.isBitField()) { + Value *Src = Previous ? Previous : Result; + QualType SrcType = E->getRHS()->getType(); + QualType DstType = E->getLHS()->getType(); CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LHSLV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); if (CGF.getLangOpts().OpenMP) @@ -4551,6 +4711,24 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } +llvm::Value *CodeGenFunction::EmitWithOriginalRHSBitfieldAssignment( + const BinaryOperator *E, Value *Previous, QualType *SrcType) { + // In case we have the integer or bitfield sanitizer checks enabled + // we want to get the expression before scalar conversion. + if (auto *ICE = dyn_cast(E->getRHS())) { + CastKind Kind = ICE->getCastKind(); + if (Kind == CK_IntegralCast) { + *SrcType = ICE->getSubExpr()->getType(); + Previous = EmitScalarExpr(ICE->getSubExpr()); + // Pass default ScalarConversionOpts to avoid emitting + // integer sanitizer checks as E refers to bitfield. + return EmitScalarConversion(Previous, *SrcType, ICE->getType(), + ICE->getExprLoc()); + } + } + return EmitScalarExpr(E->getRHS()); +} + Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { bool Ignore = TestAndClearIgnoreResultAssign(); @@ -4579,7 +4757,16 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_None: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. - RHS = Visit(E->getRHS()); + Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) + RHS = CGF.EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); + else + RHS = Visit(E->getRHS()); + LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); // Store the value into the LHS. Bit-fields are handled specially @@ -4588,6 +4775,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // the assignment...'. if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + Value *Src = Previous ? Previous : RHS; + QualType DstType = E->getLHS()->getType(); + CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType, + LHS.getBitFieldInfo(), E->getExprLoc()); } else { CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e2a7e28..99a7f51 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2786,6 +2786,21 @@ public: /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); + /// Retrieve the implicit cast expression of the rhs in a binary operator + /// expression by passing pointers to Value and QualType + /// This is used for implicit bitfield conversion checks, which + /// must compare with the value before potential truncation. + llvm::Value *EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator *E, + llvm::Value *Previous, + QualType *SrcType); + + /// Emit a check that an [implicit] conversion of a bitfield. It is not UB, + /// so we use the value after conversion. + void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType, + llvm::Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc); + /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c new file mode 100644 index 0000000..ea9bdd7 --- /dev/null +++ b/clang/test/CodeGen/ubsan-bitfield-conversion.c @@ -0,0 +1,61 @@ +// RUN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +typedef struct _xx { + int x1:3; + char x2:2; +} xx, *pxx; + +xx vxx; + +// CHECK-LABEL: define{{.*}} void @foo1 +void foo1(int x) { + vxx.x1 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo2 +void foo2(int x) { + vxx.x2 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo3 +void foo3() { + vxx.x1++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo4 +void foo4(int x) { + vxx.x1 += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp new file mode 100644 index 0000000..92f6e24 --- /dev/null +++ b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp @@ -0,0 +1,94 @@ +// RUN: %clang -x c++ -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -x c++ -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +struct S { + int a:3; + char b:2; +}; + +class C : public S { + public: + short c:3; +}; + +S s; +C c; + +// CHECK-LABEL: define{{.*}} void @{{.*foo1.*}} +void foo1(int x) { + s.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo2.*}} +void foo2(int x) { + s.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo3.*}} +void foo3() { + s.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo4.*}} +void foo4(int x) { + s.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1671825..571f79a 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -35,20 +35,20 @@ // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope" // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}} -// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP -// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP +// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 0f16507..27d0165 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -555,13 +555,11 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ReportOptions Opts, ValueHandle Src, ValueHandle Dst) { SourceLocation Loc = Data->Loc.acquire(); - ErrorType ET = ErrorType::GenericUB; - const TypeDescriptor &SrcTy = Data->FromType; const TypeDescriptor &DstTy = Data->ToType; - bool SrcSigned = SrcTy.isSignedIntegerTy(); bool DstSigned = DstTy.isSignedIntegerTy(); + ErrorType ET = ErrorType::GenericUB; switch (Data->Kind) { case ICCK_IntegerTruncation: { // Legacy, no longer used. @@ -594,14 +592,23 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ScopedReport R(Opts, Loc, ET); + // In the case we have a bitfield, we want to explicitly say so in the + // error message. // FIXME: is it possible to dump the values as hex with fixed width? - - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); + if (Data->BitfieldBits) + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit bitfield, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << Data->BitfieldBits << (DstSigned ? "" : "un"); + else + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); } void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data, diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index 3bd5046..bae661a 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -147,6 +147,7 @@ struct ImplicitConversionData { const TypeDescriptor &FromType; const TypeDescriptor &ToType; /* ImplicitConversionCheckKind */ unsigned char Kind; + unsigned int BitfieldBits; }; /// \brief Implict conversion that changed the value. -- cgit v1.1 From 5ac22600ed7caf907b740932fac191778d67a9d0 Mon Sep 17 00:00:00 2001 From: Dominik Adamski Date: Wed, 3 Apr 2024 15:15:02 +0200 Subject: [Flang][AMDGPU] Change default AMDHSA Code Object version to 5 (#87464) This is a follow-up of PR: https://github.com/llvm/llvm-project/pull/79038 --- flang/include/flang/Frontend/CodeGenOptions.h | 2 +- flang/test/Lower/AMD/code-object-version.f90 | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/flang/include/flang/Frontend/CodeGenOptions.h b/flang/include/flang/Frontend/CodeGenOptions.h index b0bbace..918192a 100644 --- a/flang/include/flang/Frontend/CodeGenOptions.h +++ b/flang/include/flang/Frontend/CodeGenOptions.h @@ -87,7 +87,7 @@ public: /// \brief Code object version for AMDGPU. llvm::CodeObjectVersionKind CodeObjectVersion = - llvm::CodeObjectVersionKind::COV_4; + llvm::CodeObjectVersionKind::COV_5; /// Optimization remark with an optional regular expression pattern. struct OptRemark { diff --git a/flang/test/Lower/AMD/code-object-version.f90 b/flang/test/Lower/AMD/code-object-version.f90 index 455f454..4380734 100644 --- a/flang/test/Lower/AMD/code-object-version.f90 +++ b/flang/test/Lower/AMD/code-object-version.f90 @@ -5,8 +5,8 @@ !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=5 %s -o - | FileCheck --check-prefix=COV_5 %s !RUN: %flang_fc1 -emit-hlfir -triple amdgcn-amd-amdhsa -target-cpu gfx908 -mcode-object-version=6 %s -o - | FileCheck --check-prefix=COV_6 %s -!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 -!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 +!COV_DEFAULT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 +!COV_NONE-NOT: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 !COV_4: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(400 : i32) {addr_space = 4 : i32} : i32 !COV_5: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(500 : i32) {addr_space = 4 : i32} : i32 !COV_6: llvm.mlir.global weak_odr hidden local_unnamed_addr constant @__oclc_ABI_version(600 : i32) {addr_space = 4 : i32} : i32 -- cgit v1.1 From 95f9b083d083c4873d9f2c62271518c0fcd1ce52 Mon Sep 17 00:00:00 2001 From: Simon Camphausen Date: Wed, 3 Apr 2024 15:22:15 +0200 Subject: [mlir][EmitC] Fix examples in op descriptions (#87478) - Remove trailing type from value attributes as emitc.opaque attributes are untyped. - Replace invalid trailing * in opaque type by wrapping it into an !emitc.ptr. --- mlir/include/mlir/Dialect/EmitC/IR/EmitC.td | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td index 090dae8..e611fd2 100644 --- a/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td +++ b/mlir/include/mlir/Dialect/EmitC/IR/EmitC.td @@ -347,9 +347,8 @@ def EmitC_ConstantOp : EmitC_Op<"constant", [ConstantLike]> { %0 = "emitc.constant"(){value = 42 : i32} : () -> i32 // Constant emitted as `char = CHAR_MIN;` - %1 = "emitc.constant"() - {value = #emitc.opaque<"CHAR_MIN"> : !emitc.opaque<"char">} - : () -> !emitc.opaque<"char"> + %1 = "emitc.constant"() {value = #emitc.opaque<"CHAR_MIN">} + : () -> !emitc.opaque<"char"> ``` }]; @@ -992,9 +991,8 @@ def EmitC_VariableOp : EmitC_Op<"variable", []> { %0 = "emitc.variable"(){value = 42 : i32} : () -> i32 // Variable emitted as `int32_t* = NULL;` - %1 = "emitc.variable"() - {value = #emitc.opaque<"NULL"> : !emitc.opaque<"int32_t*">} - : () -> !emitc.opaque<"int32_t*"> + %1 = "emitc.variable"() {value = #emitc.opaque<"NULL">} + : () -> !emitc.ptr> ``` Since folding is not supported, it can be used with pointers. -- cgit v1.1 From e329b68413cd63e03780e1e170ffe53c5edaeea3 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Wed, 3 Apr 2024 14:22:40 +0100 Subject: [VPlan] Factor out logic to check if recipe is dead (NFCI). In preparation to use the helper in more places. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 38 +++++++++++++---------- 1 file changed, 22 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 957c97cd..3753060 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -472,6 +472,26 @@ static void removeRedundantCanonicalIVs(VPlan &Plan) { } } +/// Returns true if \p R is dead and can be removed. +static bool isDeadRecipe(VPRecipeBase &R) { + using namespace llvm::PatternMatch; + // Do remove conditional assume instructions as their conditions may be + // flattened. + auto *RepR = dyn_cast(&R); + bool IsConditionalAssume = + RepR && RepR->isPredicated() && + match(RepR->getUnderlyingInstr(), m_Intrinsic()); + if (IsConditionalAssume) + return true; + + if (R.mayHaveSideEffects()) + return false; + + // Recipe is dead if no user keeps the recipe alive. + return all_of(R.definedValues(), + [](VPValue *V) { return V->getNumUsers() == 0; }); +} + static void removeDeadRecipes(VPlan &Plan) { ReversePostOrderTraversal> RPOT( Plan.getEntry()); @@ -480,22 +500,8 @@ static void removeDeadRecipes(VPlan &Plan) { // The recipes in the block are processed in reverse order, to catch chains // of dead recipes. for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) { - // A user keeps R alive: - if (any_of(R.definedValues(), - [](VPValue *V) { return V->getNumUsers(); })) - continue; - - using namespace llvm::PatternMatch; - // Having side effects keeps R alive, but do remove conditional assume - // instructions as their conditions may be flattened. - auto *RepR = dyn_cast(&R); - bool IsConditionalAssume = - RepR && RepR->isPredicated() && - match(RepR->getUnderlyingInstr(), m_Intrinsic()); - if (R.mayHaveSideEffects() && !IsConditionalAssume) - continue; - - R.eraseFromParent(); + if (isDeadRecipe(R)) + R.eraseFromParent(); } } } -- cgit v1.1 From 4d34b3295f005f739e431f379ef02da7eac75688 Mon Sep 17 00:00:00 2001 From: Fanbo Meng Date: Wed, 3 Apr 2024 09:44:59 -0400 Subject: [SystemZ][z/OS] Remove COMPILER_IBMXL macro for z/OS (#87493) This copies the change made in google benchmark (https://github.com/google/benchmark/commit/70916cbf71f50b9e1e6f13559e10d6dbb92beb32) to remove COMPILER_IBMXL for z/OS. --- third-party/benchmark/src/cycleclock.h | 5 +++-- third-party/benchmark/src/internal_macros.h | 6 +----- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/third-party/benchmark/src/cycleclock.h b/third-party/benchmark/src/cycleclock.h index eff563e..d4f1330 100644 --- a/third-party/benchmark/src/cycleclock.h +++ b/third-party/benchmark/src/cycleclock.h @@ -181,10 +181,11 @@ inline BENCHMARK_ALWAYS_INLINE int64_t Now() { #elif defined(__s390__) // Covers both s390 and s390x. // Return the CPU clock. uint64_t tsc; -#if defined(BENCHMARK_OS_ZOS) && defined(COMPILER_IBMXL) - // z/OS XL compiler HLASM syntax. +#if defined(BENCHMARK_OS_ZOS) + // z/OS HLASM syntax. asm(" stck %0" : "=m"(tsc) : : "cc"); #else + // Linux on Z syntax. asm("stck %0" : "=Q"(tsc) : : "cc"); #endif return tsc; diff --git a/third-party/benchmark/src/internal_macros.h b/third-party/benchmark/src/internal_macros.h index 8dd7d0c..f4894ba 100644 --- a/third-party/benchmark/src/internal_macros.h +++ b/third-party/benchmark/src/internal_macros.h @@ -11,11 +11,7 @@ #endif #if defined(__clang__) - #if defined(__ibmxl__) - #if !defined(COMPILER_IBMXL) - #define COMPILER_IBMXL - #endif - #elif !defined(COMPILER_CLANG) + #if !defined(COMPILER_CLANG) #define COMPILER_CLANG #endif #elif defined(_MSC_VER) -- cgit v1.1 From 250b467f7c8f06350a64d1a17e3ac7e3e390d4b1 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 06:26:36 -0700 Subject: [SLP][NFC]Simplify common analysis of instructions in BoUpSLP::collectValuesToDemote by outlining common code, NFC. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 185 ++++++++++-------------- 1 file changed, 78 insertions(+), 107 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index db052ce..cb55992 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14097,25 +14097,52 @@ bool BoUpSLP::collectValuesToDemote( } return false; }; - bool NeedToExit = false; + auto TryProcessInstruction = + [&](Instruction *I, const TreeEntry &ITE, unsigned &BitWidth, + ArrayRef Operands = std::nullopt, + function_ref Checker = {}) { + if (Operands.empty()) { + if (!IsTruncRoot) + MaxDepthLevel = 1; + (void)IsPotentiallyTruncated(V, BitWidth); + } else { + // Several vectorized uses? Check if we can truncate it, otherwise - + // exit. + if (ITE.UserTreeIndices.size() > 1 && + !IsPotentiallyTruncated(I, BitWidth)) + return false; + bool NeedToExit = false; + if (Checker && !AttemptCheckBitwidth(Checker, NeedToExit)) + return false; + if (NeedToExit) + return true; + if (!ProcessOperands(Operands, NeedToExit)) + return false; + if (NeedToExit) + return true; + } + + ++MaxDepthLevel; + // Gather demoted constant operands. + for (unsigned Idx : seq(Start, End)) + if (isa(I->getOperand(Idx))) + DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx); + // Record the value that we can demote. + ToDemote.push_back(V); + return IsProfitableToDemote; + }; switch (I->getOpcode()) { // We can always demote truncations and extensions. Since truncations can // seed additional demotion, we save the truncated value. case Instruction::Trunc: - if (!IsTruncRoot) - MaxDepthLevel = 1; if (IsProfitableToDemoteRoot) IsProfitableToDemote = true; - (void)IsPotentiallyTruncated(V, BitWidth); - break; + return TryProcessInstruction(I, *ITE, BitWidth); case Instruction::ZExt: case Instruction::SExt: - if (!IsTruncRoot) - MaxDepthLevel = 1; IsProfitableToDemote = true; - (void)IsPotentiallyTruncated(V, BitWidth); - break; + return TryProcessInstruction(I, *ITE, BitWidth); // We can demote certain binary operations if we can demote both of their // operands. @@ -14125,140 +14152,83 @@ bool BoUpSLP::collectValuesToDemote( case Instruction::And: case Instruction::Or: case Instruction::Xor: { - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; - if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) - return false; - break; + return TryProcessInstruction(I, *ITE, BitWidth, + {I->getOperand(0), I->getOperand(1)}); } case Instruction::Shl: { - // Several vectorized uses? Check if we can truncate it, otherwise - exit. - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; // If we are truncating the result of this SHL, and if it's a shift of an // inrange amount, we can always perform a SHL in a smaller type. - if (!AttemptCheckBitwidth( - [&](unsigned BitWidth, unsigned) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - return AmtKnownBits.getMaxValue().ult(BitWidth); - }, - NeedToExit)) - return false; - if (NeedToExit) - return true; - if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) - return false; - break; + auto ShlChecker = [&](unsigned BitWidth, unsigned) { + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + return AmtKnownBits.getMaxValue().ult(BitWidth); + }; + return TryProcessInstruction( + I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, ShlChecker); } case Instruction::LShr: { - // Several vectorized uses? Check if we can truncate it, otherwise - exit. - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; // If this is a truncate of a logical shr, we can truncate it to a smaller // lshr iff we know that the bits we would otherwise be shifting in are // already zeros. - if (!AttemptCheckBitwidth( - [&](unsigned BitWidth, unsigned OrigBitWidth) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - return AmtKnownBits.getMaxValue().ult(BitWidth) && - MaskedValueIsZero(I->getOperand(0), ShiftedBits, - SimplifyQuery(*DL)); - }, - NeedToExit)) - return false; - if (NeedToExit) - return true; - if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) - return false; - break; + auto LShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + APInt ShiftedBits = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return AmtKnownBits.getMaxValue().ult(BitWidth) && + MaskedValueIsZero(I->getOperand(0), ShiftedBits, + SimplifyQuery(*DL)); + }; + return TryProcessInstruction( + I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, LShrChecker); } case Instruction::AShr: { - // Several vectorized uses? Check if we can truncate it, otherwise - exit. - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; // If this is a truncate of an arithmetic shr, we can truncate it to a // smaller ashr iff we know that all the bits from the sign bit of the // original type and the sign bit of the truncate type are similar. - if (!AttemptCheckBitwidth( - [&](unsigned BitWidth, unsigned OrigBitWidth) { - KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); - unsigned ShiftedBits = OrigBitWidth - BitWidth; - return AmtKnownBits.getMaxValue().ult(BitWidth) && - ShiftedBits < ComputeNumSignBits(I->getOperand(0), *DL, 0, - AC, nullptr, DT); - }, - NeedToExit)) - return false; - if (NeedToExit) - return true; - if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) - return false; - break; + auto AShrChecker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + KnownBits AmtKnownBits = computeKnownBits(I->getOperand(1), *DL); + unsigned ShiftedBits = OrigBitWidth - BitWidth; + return AmtKnownBits.getMaxValue().ult(BitWidth) && + ShiftedBits < + ComputeNumSignBits(I->getOperand(0), *DL, 0, AC, nullptr, DT); + }; + return TryProcessInstruction( + I, *ITE, BitWidth, {I->getOperand(0), I->getOperand(1)}, AShrChecker); } case Instruction::UDiv: case Instruction::URem: { - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; // UDiv and URem can be truncated if all the truncated bits are zero. - if (!AttemptCheckBitwidth( - [&](unsigned BitWidth, unsigned OrigBitWidth) { - assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); - APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); - return MaskedValueIsZero(I->getOperand(0), Mask, - SimplifyQuery(*DL)) && - MaskedValueIsZero(I->getOperand(1), Mask, - SimplifyQuery(*DL)); - }, - NeedToExit)) - return false; - if (NeedToExit) - return true; - if (!ProcessOperands({I->getOperand(0), I->getOperand(1)}, NeedToExit)) - return false; - break; + auto Checker = [&](unsigned BitWidth, unsigned OrigBitWidth) { + assert(BitWidth <= OrigBitWidth && "Unexpected bitwidths!"); + APInt Mask = APInt::getBitsSetFrom(OrigBitWidth, BitWidth); + return MaskedValueIsZero(I->getOperand(0), Mask, SimplifyQuery(*DL)) && + MaskedValueIsZero(I->getOperand(1), Mask, SimplifyQuery(*DL)); + }; + return TryProcessInstruction(I, *ITE, BitWidth, + {I->getOperand(0), I->getOperand(1)}, Checker); } // We can demote selects if we can demote their true and false values. case Instruction::Select: { - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; Start = 1; auto *SI = cast(I); - if (!ProcessOperands({SI->getTrueValue(), SI->getFalseValue()}, NeedToExit)) - return false; - break; + return TryProcessInstruction(I, *ITE, BitWidth, + {SI->getTrueValue(), SI->getFalseValue()}); } // We can demote phis if we can demote all their incoming operands. Note that // we don't need to worry about cycles since we ensure single use above. case Instruction::PHI: { PHINode *PN = cast(I); - if (ITE->UserTreeIndices.size() > 1 && !IsPotentiallyTruncated(I, BitWidth)) - return false; SmallVector Ops(PN->incoming_values().begin(), PN->incoming_values().end()); - if (!ProcessOperands(Ops, NeedToExit)) - return false; - break; + return TryProcessInstruction(I, *ITE, BitWidth, Ops); } // Otherwise, conservatively give up. default: - MaxDepthLevel = 1; - return FinalAnalysis(); + break; } - if (NeedToExit) - return true; - - ++MaxDepthLevel; - // Gather demoted constant operands. - for (unsigned Idx : seq(Start, End)) - if (isa(I->getOperand(Idx))) - DemotedConsts.try_emplace(I).first->getSecond().push_back(Idx); - // Record the value that we can demote. - ToDemote.push_back(V); - return IsProfitableToDemote; + MaxDepthLevel = 1; + return FinalAnalysis(); } void BoUpSLP::computeMinimumValueSizes() { @@ -14309,7 +14279,8 @@ void BoUpSLP::computeMinimumValueSizes() { DenseMap> DemotedConsts; auto ComputeMaxBitWidth = [&](ArrayRef TreeRoot, unsigned VF, bool IsTopRoot, bool IsProfitableToDemoteRoot, - unsigned Opcode, unsigned Limit, bool IsTruncRoot) { + unsigned Opcode, unsigned Limit, + bool IsTruncRoot) { ToDemote.clear(); auto *TreeRootIT = dyn_cast(TreeRoot[0]->getType()); if (!TreeRootIT || !Opcode) -- cgit v1.1 From d650fcd6bf1323513213dd69eacbb2b08c870618 Mon Sep 17 00:00:00 2001 From: aniplcc Date: Wed, 3 Apr 2024 19:30:50 +0530 Subject: [DAG] SimplifyDemandedVectorElts - add ISD::AVGCEILS/AVGCEILU/AVGFLOORS/AVGFLOORU nodes (#86284) Fixes #84768 --- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 4 ++ llvm/test/CodeGen/AArch64/hadd-combine.ll | 54 +++++++++++++++++++++++- llvm/test/CodeGen/X86/combine-pavg.ll | 9 ++-- 3 files changed, 60 insertions(+), 7 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 962f0d9..8bb9541 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -3524,6 +3524,10 @@ bool TargetLowering::SimplifyDemandedVectorElts( } [[fallthrough]]; } + case ISD::AVGCEILS: + case ISD::AVGCEILU: + case ISD::AVGFLOORS: + case ISD::AVGFLOORU: case ISD::OR: case ISD::XOR: case ISD::SUB: diff --git a/llvm/test/CodeGen/AArch64/hadd-combine.ll b/llvm/test/CodeGen/AArch64/hadd-combine.ll index 491bf40..c0f7678 100644 --- a/llvm/test/CodeGen/AArch64/hadd-combine.ll +++ b/llvm/test/CodeGen/AArch64/hadd-combine.ll @@ -903,6 +903,58 @@ define <8 x i16> @shadd_fixedwidth_v8i16(<8 x i16> %a0, <8 x i16> %a1) { ret <8 x i16> %res } +define <8 x i16> @shadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: shadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: shadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.shadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @srhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: srhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: srhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @uhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: uhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: uhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.uhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + +define <8 x i16> @urhadd_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: urhadd_demandedelts: +; CHECK: // %bb.0: +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: urhadd v0.8h, v0.8h, v1.8h +; CHECK-NEXT: dup v0.8h, v0.h[0] +; CHECK-NEXT: ret + %s0 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> zeroinitializer + %op = call <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16> %s0, <8 x i16> %a1) + %r0 = shufflevector <8 x i16> %op, <8 x i16> undef, <8 x i32> zeroinitializer + ret <8 x i16> %r0 +} + declare <8 x i8> @llvm.aarch64.neon.shadd.v8i8(<8 x i8>, <8 x i8>) declare <4 x i16> @llvm.aarch64.neon.shadd.v4i16(<4 x i16>, <4 x i16>) declare <2 x i32> @llvm.aarch64.neon.shadd.v2i32(<2 x i32>, <2 x i32>) @@ -927,4 +979,4 @@ declare <8 x i16> @llvm.aarch64.neon.srhadd.v8i16(<8 x i16>, <8 x i16>) declare <4 x i32> @llvm.aarch64.neon.srhadd.v4i32(<4 x i32>, <4 x i32>) declare <16 x i8> @llvm.aarch64.neon.urhadd.v16i8(<16 x i8>, <16 x i8>) declare <8 x i16> @llvm.aarch64.neon.urhadd.v8i16(<8 x i16>, <8 x i16>) -declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) +declare <4 x i32> @llvm.aarch64.neon.urhadd.v4i32(<4 x i32>, <4 x i32>) \ No newline at end of file diff --git a/llvm/test/CodeGen/X86/combine-pavg.ll b/llvm/test/CodeGen/X86/combine-pavg.ll index 7a8ddf5..cb2d426 100644 --- a/llvm/test/CodeGen/X86/combine-pavg.ll +++ b/llvm/test/CodeGen/X86/combine-pavg.ll @@ -84,25 +84,22 @@ define <16 x i8> @combine_pavgw_knownbits(<8 x i16> %a0, <8 x i16> %a1, <8 x i16 define <8 x i16> @combine_pavgw_demandedelts(<8 x i16> %a0, <8 x i16> %a1) { ; SSE-LABEL: combine_pavgw_demandedelts: ; SSE: # %bb.0: -; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; SSE-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; SSE-NEXT: pavgw %xmm1, %xmm0 +; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; SSE-NEXT: retq ; ; AVX1-LABEL: combine_pavgw_demandedelts: ; AVX1: # %bb.0: -; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] -; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,0,1,0,1,0,1,8,9,8,9,12,13,12,13] ; AVX1-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] ; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] ; AVX1-NEXT: retq ; ; AVX2-LABEL: combine_pavgw_demandedelts: ; AVX2: # %bb.0: -; AVX2-NEXT: vpbroadcastw %xmm1, %xmm1 -; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: vpavgw %xmm1, %xmm0, %xmm0 +; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 ; AVX2-NEXT: retq %s0 = shufflevector <8 x i16> %a0, <8 x i16> poison, <8 x i32> %avg = tail call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %s0, <8 x i16> %a1) -- cgit v1.1 From 1f7c3d609b01d0cf2a0b973cc17a9b0bca8e56b5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 15:06:54 +0100 Subject: [X86] getEffectiveX86CodeModel - take a Triple argument instead of just a Is64Bit flag. NFC. (#87479) Matches what most other targets do and makes it easier to specify code model based off other triple settings in the future. --- llvm/lib/Target/X86/X86TargetMachine.cpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetMachine.cpp b/llvm/lib/Target/X86/X86TargetMachine.cpp index 276bc7f..86b4560 100644 --- a/llvm/lib/Target/X86/X86TargetMachine.cpp +++ b/llvm/lib/Target/X86/X86TargetMachine.cpp @@ -211,8 +211,9 @@ static Reloc::Model getEffectiveRelocModel(const Triple &TT, bool JIT, } static CodeModel::Model -getEffectiveX86CodeModel(std::optional CM, bool JIT, - bool Is64Bit) { +getEffectiveX86CodeModel(const Triple &TT, std::optional CM, + bool JIT) { + bool Is64Bit = TT.getArch() == Triple::x86_64; if (CM) { if (*CM == CodeModel::Tiny) report_fatal_error("Target does not support the tiny CodeModel", false); @@ -234,7 +235,7 @@ X86TargetMachine::X86TargetMachine(const Target &T, const Triple &TT, : LLVMTargetMachine( T, computeDataLayout(TT), TT, CPU, FS, Options, getEffectiveRelocModel(TT, JIT, RM), - getEffectiveX86CodeModel(CM, JIT, TT.getArch() == Triple::x86_64), + getEffectiveX86CodeModel(TT, CM, JIT), OL), TLOF(createTLOF(getTargetTriple())), IsJIT(JIT) { // On PS4/PS5, the "return address" of a 'noreturn' call must still be within -- cgit v1.1 From 269d0aaec1801000a39122b1c5792d9c096b33ec Mon Sep 17 00:00:00 2001 From: Adrian Kuegel Date: Wed, 3 Apr 2024 14:12:26 +0000 Subject: [mlir] Apply ClangTidy findings. modernize-use-override ClangTidy check. This warning appears on overridden virtual functions not marked with override or final keywords or marked with more than one of virtual, override, final. --- mlir/include/mlir/Pass/Pass.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/mlir/include/mlir/Pass/Pass.h b/mlir/include/mlir/Pass/Pass.h index 0f50f30..e71c49a 100644 --- a/mlir/include/mlir/Pass/Pass.h +++ b/mlir/include/mlir/Pass/Pass.h @@ -355,7 +355,7 @@ private: template class OperationPass : public Pass { public: - ~OperationPass() = default; + ~OperationPass() override = default; protected: OperationPass(TypeID passID) : Pass(passID, OpT::getOperationName()) {} @@ -400,7 +400,7 @@ protected: template <> class OperationPass : public Pass { public: - ~OperationPass() = default; + ~OperationPass() override = default; protected: OperationPass(TypeID passID) : Pass(passID) {} @@ -461,7 +461,7 @@ public: static bool classof(const Pass *pass) { return pass->getTypeID() == TypeID::get(); } - ~PassWrapper() = default; + ~PassWrapper() override = default; protected: PassWrapper() : BaseT(TypeID::get()) {} -- cgit v1.1 From 39eedfded4b990132888b93e3bbf168be8af2038 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 15:37:27 +0100 Subject: [DAG] visitADDLikeCommutative - convert (add x, shl(0 - y, n)) fold to SDPatternMatch. NFC. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index b889e4f..28fe069 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3053,17 +3053,15 @@ static SDValue foldAddSubMasked1(bool IsAdd, SDValue N0, SDValue N1, /// Helper for doing combines based on N0 and N1 being added to each other. SDValue DAGCombiner::visitADDLikeCommutative(SDValue N0, SDValue N1, - SDNode *LocReference) { + SDNode *LocReference) { EVT VT = N0.getValueType(); SDLoc DL(LocReference); // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) - if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && - isNullOrNullSplat(N1.getOperand(0).getOperand(0))) + SDValue Y, N; + if (sd_match(N1, m_Shl(m_Neg(m_Value(Y)), m_Value(N)))) return DAG.getNode(ISD::SUB, DL, VT, N0, - DAG.getNode(ISD::SHL, DL, VT, - N1.getOperand(0).getOperand(1), - N1.getOperand(1))); + DAG.getNode(ISD::SHL, DL, VT, Y, N)); if (SDValue V = foldAddSubMasked1(true, N0, N1, DAG, DL)) return V; -- cgit v1.1 From 0f5f931a9b32208a4894da57ea5c7428ead9df8d Mon Sep 17 00:00:00 2001 From: Weining Lu Date: Wed, 3 Apr 2024 21:09:04 +0800 Subject: [CodeGen] Fix test after #86049 --- llvm/test/CodeGen/Generic/allow-check.ll | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/test/CodeGen/Generic/allow-check.ll b/llvm/test/CodeGen/Generic/allow-check.ll index 43dab68..a084889 100644 --- a/llvm/test/CodeGen/Generic/allow-check.ll +++ b/llvm/test/CodeGen/Generic/allow-check.ll @@ -2,6 +2,7 @@ ; REQUIRES: host-byteorder-little-endian ; -global-isel=1 is unsupported. +; XFAIL: target=loongarch{{.*}} ; XFAIL: target=nvptx{{.*}} ; XFAIL: target=sparc{{.*}} ; XFAIL: target=hexagon-{{.*}} -- cgit v1.1 From 7c178fdf0094afbf4757d71b792bc159ddcac72f Mon Sep 17 00:00:00 2001 From: David Spickett Date: Wed, 3 Apr 2024 14:46:27 +0000 Subject: [lldb] Correct byte order check for 128 bit integer registers Size was clearly not correct here. This call has been here since the initial reformat of all of lldb so it has likely always been incorrect. (although registers don't typically have an endian, they are just values, in the remote protocol register data is in target endian) This might have been a problem for Neon registers on big endian AArch64, but only if the debug server describes them as integers. lldb-server does not, they've always been vectors which doesn't take this code path. Not adding a test because the way I've mocked up a big endian target in the past is using s390x as the architecture. This apparently has some form of vector extension that may be 128 bit but lldb doesn't support it. --- lldb/source/Utility/RegisterValue.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lldb/source/Utility/RegisterValue.cpp b/lldb/source/Utility/RegisterValue.cpp index fa92ba8..cbf8402 100644 --- a/lldb/source/Utility/RegisterValue.cpp +++ b/lldb/source/Utility/RegisterValue.cpp @@ -199,7 +199,7 @@ Status RegisterValue::SetValueFromData(const RegisterInfo ®_info, else if (reg_info.byte_size <= 16) { uint64_t data1 = src.GetU64(&src_offset); uint64_t data2 = src.GetU64(&src_offset); - if (src.GetByteSize() == eByteOrderBig) { + if (src.GetByteOrder() == eByteOrderBig) { int128.x[0] = data1; int128.x[1] = data2; } else { -- cgit v1.1 From 9808279b0ec3663428fbf6294dfdd1d4f70b1cda Mon Sep 17 00:00:00 2001 From: Paul Robinson Date: Wed, 3 Apr 2024 08:11:27 -0700 Subject: [NFC] Bump DIAG_SIZE_FRONTEND (hit the limit downstream as of e05c1b46) --- clang/include/clang/Basic/DiagnosticIDs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/include/clang/Basic/DiagnosticIDs.h b/clang/include/clang/Basic/DiagnosticIDs.h index 5ff782c..bce7605 100644 --- a/clang/include/clang/Basic/DiagnosticIDs.h +++ b/clang/include/clang/Basic/DiagnosticIDs.h @@ -32,7 +32,7 @@ namespace clang { enum { DIAG_SIZE_COMMON = 300, DIAG_SIZE_DRIVER = 400, - DIAG_SIZE_FRONTEND = 150, + DIAG_SIZE_FRONTEND = 200, DIAG_SIZE_SERIALIZATION = 120, DIAG_SIZE_LEX = 400, DIAG_SIZE_PARSE = 700, -- cgit v1.1 From a6170d5b7e45d85ffdab124a4e2bd0f0e1d29f2c Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Wed, 3 Apr 2024 16:21:57 +0100 Subject: [SelectionDAG] Dump convergencectrl_glue DAG node (#87487) --- llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 20375a0..6691aa4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -456,6 +456,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::CONVERGENCECTRL_ANCHOR: return "convergencectrl_anchor"; case ISD::CONVERGENCECTRL_ENTRY: return "convergencectrl_entry"; case ISD::CONVERGENCECTRL_LOOP: return "convergencectrl_loop"; + case ISD::CONVERGENCECTRL_GLUE: return "convergencectrl_glue"; // Bit manipulation case ISD::ABS: return "abs"; -- cgit v1.1 From 1aedf949e0f6d5e0a6b15e28780be126730db023 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Amaury=20S=C3=A9chet?= Date: Tue, 9 Jan 2024 13:52:39 +0000 Subject: [NFC] Automatically generate indirect-branch-tracking-eh2.ll --- .../CodeGen/X86/indirect-branch-tracking-eh2.ll | 220 ++++++++++++++++----- 1 file changed, 172 insertions(+), 48 deletions(-) diff --git a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll index 64d44d9..0123431 100644 --- a/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll +++ b/llvm/test/CodeGen/X86/indirect-branch-tracking-eh2.ll @@ -1,59 +1,183 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=NUM ; RUN: llc -mtriple x86_64-unknown-unknown -exception-model sjlj -verify-machineinstrs=0 -simplifycfg-require-and-preserve-domtree=1 < %s | FileCheck %s --check-prefix=SJLJ -; NUM-COUNT-3: endbr64 - -;SJLJ: main: # @main -;SJLJ-NEXT: .Lfunc_begin0: -;SJLJ-NEXT: # %bb.0: # %entry -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: pushq %rbp -;SJLJ: callq _Unwind_SjLj_Register -;SJLJ-NEXT: .Ltmp0: -;SJLJ-NEXT: callq _Z3foov -;SJLJ-NEXT: .Ltmp1: -;SJLJ-NEXT: # %bb.1: # %invoke.cont -;SJLJ-NEXT: movl -;SJLJ-NEXT: .LBB0_7: # %return -;SJLJ: callq _Unwind_SjLj_Unregister -;SJLJ: retq -;SJLJ-NEXT: .LBB0_9: -;SJLJ-NEXT: endbr64 -;SJLJ-NEXT: movl -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jb .LBB0_10 -;SJLJ-NEXT: # %bb.11: -;SJLJ-NEXT: ud2 -;SJLJ-NEXT: .LBB0_10: -;SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx -;SJLJ-NEXT: jmpq *(%rcx,%rax,8) -;SJLJ-NEXT: .LBB0_2: # %lpad -;SJLJ-NEXT: .Ltmp2: -;SJLJ-NEXT: endbr64 -;SJLJ: jne .LBB0_4 -;SJLJ-NEXT: # %bb.3: # %catch3 -;SJLJ: callq __cxa_begin_catch -;SJLJ: jmp .LBB0_6 -;SJLJ-NEXT: .LBB0_4: # %catch.fallthrough -;SJLJ-NEXT: cmpl -;SJLJ-NEXT: jne .LBB0_8 -;SJLJ-NEXT: # %bb.5: # %catch -;SJLJ: callq __cxa_begin_catch -;SJLJ: cmpb -;SJLJ-NEXT: .LBB0_6: # %return -;SJLJ: callq __cxa_end_catch -;SJLJ-NEXT: jmp .LBB0_7 -;SJLJ-NEXT: .LBB0_8: # %eh.resume -;SJLJ-NEXT: movl -;SJLJ-NEXT: .Lfunc_end0: -;SJLJ: .LJTI0_0: -;SJLJ-NEXT: .quad .LBB0_2 - @_ZTIi = external dso_local constant ptr @_ZTIc = external dso_local constant ptr ; Function Attrs: noinline norecurse optnone uwtable define dso_local i32 @main() #0 personality ptr @__gxx_personality_sj0 { +; NUM-LABEL: main: +; NUM: # %bb.0: # %entry +; NUM-NEXT: endbr64 +; NUM-NEXT: pushq %rbp +; NUM-NEXT: movq %rsp, %rbp +; NUM-NEXT: pushq %r15 +; NUM-NEXT: pushq %r14 +; NUM-NEXT: pushq %r13 +; NUM-NEXT: pushq %r12 +; NUM-NEXT: pushq %rbx +; NUM-NEXT: subq $120, %rsp +; NUM-NEXT: movl $0, -44(%rbp) +; NUM-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; NUM-NEXT: movq $GCC_except_table0, -112(%rbp) +; NUM-NEXT: movq %rbp, -104(%rbp) +; NUM-NEXT: movq %rsp, -88(%rbp) +; NUM-NEXT: movq $.LBB0_9, -96(%rbp) +; NUM-NEXT: movl $1, -144(%rbp) +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Register@PLT +; NUM-NEXT: .Ltmp0: +; NUM-NEXT: callq _Z3foov +; NUM-NEXT: .Ltmp1: +; NUM-NEXT: # %bb.1: # %invoke.cont +; NUM-NEXT: movl $1, -44(%rbp) +; NUM-NEXT: .LBB0_7: # %return +; NUM-NEXT: movl -44(%rbp), %ebx +; NUM-NEXT: leaq -152(%rbp), %rdi +; NUM-NEXT: callq _Unwind_SjLj_Unregister@PLT +; NUM-NEXT: movl %ebx, %eax +; NUM-NEXT: addq $120, %rsp +; NUM-NEXT: popq %rbx +; NUM-NEXT: popq %r12 +; NUM-NEXT: popq %r13 +; NUM-NEXT: popq %r14 +; NUM-NEXT: popq %r15 +; NUM-NEXT: popq %rbp +; NUM-NEXT: retq +; NUM-NEXT: .LBB0_9: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -144(%rbp), %eax +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jb .LBB0_10 +; NUM-NEXT: # %bb.11: +; NUM-NEXT: ud2 +; NUM-NEXT: .LBB0_10: +; NUM-NEXT: leaq .LJTI0_0(%rip), %rcx +; NUM-NEXT: jmpq *(%rcx,%rax,8) +; NUM-NEXT: .LBB0_2: # %lpad +; NUM-NEXT: .Ltmp2: +; NUM-NEXT: endbr64 +; NUM-NEXT: movl -140(%rbp), %ecx +; NUM-NEXT: movl -136(%rbp), %eax +; NUM-NEXT: movq %rcx, -56(%rbp) +; NUM-NEXT: movl %eax, -64(%rbp) +; NUM-NEXT: cmpl $2, %eax +; NUM-NEXT: jne .LBB0_4 +; NUM-NEXT: # %bb.3: # %catch3 +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movl (%rax), %eax +; NUM-NEXT: movl %eax, -60(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpl $5, %eax +; NUM-NEXT: jmp .LBB0_6 +; NUM-NEXT: .LBB0_4: # %catch.fallthrough +; NUM-NEXT: cmpl $1, %eax +; NUM-NEXT: jne .LBB0_8 +; NUM-NEXT: # %bb.5: # %catch +; NUM-NEXT: movq -56(%rbp), %rdi +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_begin_catch +; NUM-NEXT: movzbl (%rax), %eax +; NUM-NEXT: movb %al, -45(%rbp) +; NUM-NEXT: xorl %ecx, %ecx +; NUM-NEXT: cmpb $3, %al +; NUM-NEXT: .LBB0_6: # %return +; NUM-NEXT: setne %cl +; NUM-NEXT: movl %ecx, -44(%rbp) +; NUM-NEXT: movl $-1, -144(%rbp) +; NUM-NEXT: callq __cxa_end_catch +; NUM-NEXT: jmp .LBB0_7 +; NUM-NEXT: .LBB0_8: # %eh.resume +; NUM-NEXT: movl $-1, -144(%rbp) +; +; SJLJ-LABEL: main: +; SJLJ: # %bb.0: # %entry +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: pushq %rbp +; SJLJ-NEXT: movq %rsp, %rbp +; SJLJ-NEXT: pushq %r15 +; SJLJ-NEXT: pushq %r14 +; SJLJ-NEXT: pushq %r13 +; SJLJ-NEXT: pushq %r12 +; SJLJ-NEXT: pushq %rbx +; SJLJ-NEXT: subq $120, %rsp +; SJLJ-NEXT: movl $0, -44(%rbp) +; SJLJ-NEXT: movq $__gxx_personality_sj0, -120(%rbp) +; SJLJ-NEXT: movq $GCC_except_table0, -112(%rbp) +; SJLJ-NEXT: movq %rbp, -104(%rbp) +; SJLJ-NEXT: movq %rsp, -88(%rbp) +; SJLJ-NEXT: movq $.LBB0_9, -96(%rbp) +; SJLJ-NEXT: movl $1, -144(%rbp) +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Register@PLT +; SJLJ-NEXT: .Ltmp0: +; SJLJ-NEXT: callq _Z3foov +; SJLJ-NEXT: .Ltmp1: +; SJLJ-NEXT: # %bb.1: # %invoke.cont +; SJLJ-NEXT: movl $1, -44(%rbp) +; SJLJ-NEXT: .LBB0_7: # %return +; SJLJ-NEXT: movl -44(%rbp), %ebx +; SJLJ-NEXT: leaq -152(%rbp), %rdi +; SJLJ-NEXT: callq _Unwind_SjLj_Unregister@PLT +; SJLJ-NEXT: movl %ebx, %eax +; SJLJ-NEXT: addq $120, %rsp +; SJLJ-NEXT: popq %rbx +; SJLJ-NEXT: popq %r12 +; SJLJ-NEXT: popq %r13 +; SJLJ-NEXT: popq %r14 +; SJLJ-NEXT: popq %r15 +; SJLJ-NEXT: popq %rbp +; SJLJ-NEXT: retq +; SJLJ-NEXT: .LBB0_9: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -144(%rbp), %eax +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jb .LBB0_10 +; SJLJ-NEXT: # %bb.11: +; SJLJ-NEXT: ud2 +; SJLJ-NEXT: .LBB0_10: +; SJLJ-NEXT: leaq .LJTI0_0(%rip), %rcx +; SJLJ-NEXT: jmpq *(%rcx,%rax,8) +; SJLJ-NEXT: .LBB0_2: # %lpad +; SJLJ-NEXT: .Ltmp2: +; SJLJ-NEXT: endbr64 +; SJLJ-NEXT: movl -140(%rbp), %ecx +; SJLJ-NEXT: movl -136(%rbp), %eax +; SJLJ-NEXT: movq %rcx, -56(%rbp) +; SJLJ-NEXT: movl %eax, -64(%rbp) +; SJLJ-NEXT: cmpl $2, %eax +; SJLJ-NEXT: jne .LBB0_4 +; SJLJ-NEXT: # %bb.3: # %catch3 +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movl (%rax), %eax +; SJLJ-NEXT: movl %eax, -60(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpl $5, %eax +; SJLJ-NEXT: jmp .LBB0_6 +; SJLJ-NEXT: .LBB0_4: # %catch.fallthrough +; SJLJ-NEXT: cmpl $1, %eax +; SJLJ-NEXT: jne .LBB0_8 +; SJLJ-NEXT: # %bb.5: # %catch +; SJLJ-NEXT: movq -56(%rbp), %rdi +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_begin_catch +; SJLJ-NEXT: movzbl (%rax), %eax +; SJLJ-NEXT: movb %al, -45(%rbp) +; SJLJ-NEXT: xorl %ecx, %ecx +; SJLJ-NEXT: cmpb $3, %al +; SJLJ-NEXT: .LBB0_6: # %return +; SJLJ-NEXT: setne %cl +; SJLJ-NEXT: movl %ecx, -44(%rbp) +; SJLJ-NEXT: movl $-1, -144(%rbp) +; SJLJ-NEXT: callq __cxa_end_catch +; SJLJ-NEXT: jmp .LBB0_7 +; SJLJ-NEXT: .LBB0_8: # %eh.resume +; SJLJ-NEXT: movl $-1, -144(%rbp) entry: %retval = alloca i32, align 4 %exn.slot = alloca ptr -- cgit v1.1 From 6a13bbf92f6f7f2f5d59dfda99ccca223c72eeef Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Wed, 3 Apr 2024 11:34:12 -0400 Subject: =?UTF-8?q?[AMDGPU][MC]=20Enables=20sgpr=20or=20imm=20src1=20for?= =?UTF-8?q?=20float=20VOP3=20DPP,=20but=20excludi=E2=80=A6=20(#87382)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit …ng VOPC. Fixes support on GFX1150 and GFX12 where src1 of e64_dpp instructions should allow sgpr and imm operands. PR #67461 added support for this with int operands, but it was missing a piece for float. Changing VOPC e64_dpp will be in a different patch because there is a bug preventing that change. --- .../Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp | 13 +- llvm/lib/Target/AMDGPU/SIInstrInfo.td | 2 +- llvm/lib/Target/AMDGPU/VOP2Instructions.td | 19 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 8 + llvm/test/MC/AMDGPU/gfx1150_asm_features.s | 10 + llvm/test/MC/AMDGPU/gfx11_asm_err.s | 8 +- llvm/test/MC/AMDGPU/gfx12_asm_features.s | 4 + llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s | 440 +++++++++++++++++++- llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s | 442 ++++++++++++++++++++- .../MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s | 210 ++++++++++ .../test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s | 241 +++++++++++ llvm/test/MC/AMDGPU/gfx12_err.s | 16 + llvm/test/MC/AMDGPU/vop_dpp.s | 4 +- .../Disassembler/AMDGPU/gfx1150_dasm_features.txt | 9 + .../MC/Disassembler/AMDGPU/gfx12_dasm_features.txt | 4 + .../Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt | 216 ++++++++++ .../Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt | 213 ++++++++++ .../AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt | 106 +++++ .../AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt | 109 +++++ 19 files changed, 2055 insertions(+), 19 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp index 294fc68..3866723 100644 --- a/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp +++ b/llvm/lib/Target/AMDGPU/AsmParser/AMDGPUAsmParser.cpp @@ -4627,10 +4627,15 @@ bool AMDGPUAsmParser::validateDPP(const MCInst &Inst, if (Src1Idx >= 0) { const MCOperand &Src1 = Inst.getOperand(Src1Idx); const MCRegisterInfo *TRI = getContext().getRegisterInfo(); - if (Src1.isImm() || - (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI))) { - AMDGPUOperand &Op = ((AMDGPUOperand &)*Operands[Src1Idx]); - Error(Op.getStartLoc(), "invalid operand for instruction"); + if (Src1.isReg() && isSGPR(mc2PseudoReg(Src1.getReg()), TRI)) { + auto Reg = mc2PseudoReg(Inst.getOperand(Src1Idx).getReg()); + SMLoc S = getRegLoc(Reg, Operands); + Error(S, "invalid operand for instruction"); + return false; + } + if (Src1.isImm()) { + Error(getInstLoc(Operands), + "src1 immediate operand invalid for instruction"); return false; } } diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td index 1694436..f1afbcc 100644 --- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td +++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td @@ -2268,7 +2268,7 @@ class VOPProfile _ArgVT, bit _EnableClamp = 0> { field Operand Src1ModDPP = getSrcModDPP.ret; field Operand Src2ModDPP = getSrcModDPP.ret; field Operand Src0ModVOP3DPP = getSrcModDPP.ret; - field Operand Src1ModVOP3DPP = getSrcModDPP.ret; + field Operand Src1ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src2ModVOP3DPP = getSrcModVOP3DPP.ret; field Operand Src0ModSDWA = getSrcModSDWA.ret; field Operand Src1ModSDWA = getSrcModSDWA.ret; diff --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td index f136a43..c001c5d 100644 --- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td +++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td @@ -503,6 +503,7 @@ def VOP_MAC_F16_t16 : VOP_MAC { dpp8:$dpp8, Dpp8FI:$fi); let Src2Mod = FP32InputMods; // dummy unused modifiers let Src2RC64 = VGPRSrc_32; // stub argument + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; } def VOP_MAC_F32 : VOP_MAC ; let HasExtDPP = 0, HasExt32BitDPP = 0 in @@ -618,7 +619,7 @@ class VOP2e_SGPR ArgVT> : VOPProfile { let AsmVOP3Base = "$vdst, $src0_modifiers, $src1_modifiers, $src2"; let Outs32 = (outs DstRC:$vdst); - let Outs64 = (outs DstRC:$vdst); + let Outs64 = (outs DstRC64:$vdst); // Suppress src2 implied by type since the 32-bit encoding uses an // implicit VCC use. @@ -652,7 +653,7 @@ class VOP2e_SGPR ArgVT> : VOPProfile { dpp8:$dpp8, Dpp8FI:$fi); let Src0ModVOP3DPP = FPVRegInputMods; - let Src1ModVOP3DPP = FPVRegInputMods; + let Src1ModVOP3DPP = FP32VCSrcInputMods; let HasExt = 1; let HasExtDPP = 1; @@ -662,7 +663,17 @@ class VOP2e_SGPR ArgVT> : VOPProfile { } def VOP2e_I32_I32_I32_I1 : VOP2e_SGPR<[i32, i32, i32, i1]>; -def VOP2e_I16_I16_I16_I1 : VOP2e_SGPR<[i16, i16, i16, i1]>; +def VOP2e_I16_I16_I16_I1_fake16 : VOP2e_SGPR<[i16, i16, i16, i1]> { + let IsTrue16 = 1; + let DstRC64 = getVALUDstForVT.ret; + + let Src0Mod = getSrcMod.ret; + let Src1Mod = getSrcMod.ret; + + let Src0VOP3DPP = VGPRSrc_32; + let Src1VOP3DPP = getVOP3DPPSrcForVT.ret; + let Src1ModVOP3DPP = getSrcModVOP3DPP.ret; +} def VOP_READLANE : VOPProfile<[i32, i32, i32, untyped]> { let Outs32 = (outs SReg_32:$vdst); @@ -703,7 +714,7 @@ def VOP_WRITELANE : VOPProfile<[i32, i32, i32, i32]> { //===----------------------------------------------------------------------===// let SubtargetPredicate = isGFX11Plus in -defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1>; +defm V_CNDMASK_B16 : VOP2eInst <"v_cndmask_b16", VOP2e_I16_I16_I16_I1_fake16>; defm V_CNDMASK_B32 : VOP2eInst_VOPD <"v_cndmask_b32", VOP2e_I32_I32_I32_I1, 0x9, "v_cndmask_b32">; let SubtargetPredicate = HasMadMacF32Insts, isReMaterializable = 1 in def V_MADMK_F32 : VOP2_Pseudo <"v_madmk_f32", VOP_MADMK_F32, []>; diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 022fb7c..16dd353 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -75,6 +75,8 @@ class VOPC_Profile sched, ValueType vt0, ValueType vt1 = vt let HasDst32 = 0; // VOPC disallows dst_sel and dst_unused as they have no effect on destination let EmitDstSel = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP.ret; let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; @@ -112,6 +114,8 @@ class VOPC_NoSdst_Profile sched, ValueType vt0, "$src0, $src1"); let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, ValueType vt1 = vt0> { @@ -785,6 +789,8 @@ class VOPC_Class_Profile sched, ValueType src0VT, ValueType let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_Class_Profile_t16 sched> { @@ -812,6 +818,8 @@ class VOPC_Class_NoSdst_Profile sched, ValueType src0VT, Va let AsmVOP3Base = "$src0_modifiers, $src1"; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; + // FIXME: work around AsmParser bug + let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_Class_NoSdst_Profile_t16 sched> { diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s index 056221f..336dd8b 100644 --- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s @@ -23,3 +23,13 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] // GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, s2 row_mirror +// GFX1150: encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] + +v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf +// GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] + +// This is a regression test for potential changes in the future. +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx11_asm_err.s b/llvm/test/MC/AMDGPU/gfx11_asm_err.s index da1989e..3ec3162 100644 --- a/llvm/test/MC/AMDGPU/gfx11_asm_err.s +++ b/llvm/test/MC/AMDGPU/gfx11_asm_err.s @@ -51,13 +51,13 @@ v_add3_u32_e64_dpp v5, v1, s1, v0 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_cvt_f32_i32_e64_dpp v5, s1 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction @@ -135,7 +135,7 @@ v_fmac_f16_e64_dpp v5, s2, v3 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fmac_f16_e64_dpp v5, v2, 1.0 quad_perm:[3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_fmac_f32_e64_dpp v5, s2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction @@ -144,7 +144,7 @@ v_fmac_f32_e64_dpp v5, 0x1234, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_fmac_f32_e64_dpp v5, v2, 1 dpp8:[7,6,5,4,3,2,1,0] -// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: src1 immediate operand invalid for instruction v_fmac_f32_e64_dpp v5, -1.0, v3 quad_perm:[3,2,1,0] // GFX11: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s index bb911c6..f32b7da 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s @@ -23,6 +23,10 @@ v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] // GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// This is a regression test for potential changes in the future. +v_cmp_le_f32 vcc_lo, v1, v2 row_mirror +// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] + // // Elements of CPol operand can be given in any order // diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s index 88bdb7e..d0e309a 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s @@ -6,6 +6,12 @@ v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -57,6 +63,10 @@ v_add_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -113,6 +123,10 @@ v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x00,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -155,6 +169,12 @@ v_add_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 bank v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -323,6 +343,12 @@ v_add_nc_u16_e64_dpp v255, v255, v255 clamp row_xmask:15 row_mask:0x3 bank_mask: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbit_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -365,6 +391,12 @@ v_alignbit_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -449,6 +481,12 @@ v_and_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_and_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -575,6 +613,12 @@ v_bcnt_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -617,6 +661,12 @@ v_bfe_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfe_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -659,6 +709,12 @@ v_bfe_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_bfi_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -752,6 +808,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_mirror // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0c,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 row_mirror +// W32: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x0d,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s3 row_half_mirror // W32: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x0e,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -808,6 +872,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_half_mirror // W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x18,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s[6:7] row_half_mirror +// W64: [0x05,0x00,0x5d,0xd6,0xfa,0x14,0x19,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s[6:7] row_shl:1 // W64: [0x05,0x00,0x5d,0xd6,0xfa,0x04,0x1a,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -850,6 +922,12 @@ v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null row_xmask:15 row_mask:0x3 ban v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubeid_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -892,6 +970,12 @@ v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubema_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -934,6 +1018,12 @@ v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubesc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -976,6 +1066,12 @@ v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cubetc_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1378,6 +1474,12 @@ v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1588,6 +1690,12 @@ v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 b v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1630,6 +1738,12 @@ v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 ro v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1672,6 +1786,12 @@ v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_fma_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1756,6 +1876,9 @@ v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 row_xmask:15 row_mask:0x3 ba v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lerp_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1798,6 +1921,12 @@ v_lerp_u8_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1840,6 +1969,12 @@ v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -1966,6 +2101,12 @@ v_lshrrev_b16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 b v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2008,6 +2149,12 @@ v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2050,6 +2197,12 @@ v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2092,6 +2245,12 @@ v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2134,6 +2293,12 @@ v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2176,6 +2341,12 @@ v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2218,6 +2389,12 @@ v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2260,6 +2437,12 @@ v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2302,6 +2485,12 @@ v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2344,6 +2533,12 @@ v_max3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2386,6 +2581,12 @@ v_max3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2428,6 +2629,12 @@ v_max3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_max3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_max3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2554,6 +2761,12 @@ v_max_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2596,6 +2809,12 @@ v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2638,6 +2857,12 @@ v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2680,6 +2905,12 @@ v_maxmin_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maxmin_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2806,6 +3037,12 @@ v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2848,6 +3085,12 @@ v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2890,6 +3133,12 @@ v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2932,6 +3181,12 @@ v_med3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -2974,6 +3229,12 @@ v_med3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3016,6 +3277,12 @@ v_med3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_med3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_med3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3058,6 +3325,12 @@ v_med3_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3100,6 +3373,12 @@ v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3142,6 +3421,12 @@ v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3184,6 +3469,12 @@ v_min3_i16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3226,6 +3517,12 @@ v_min3_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3268,6 +3565,12 @@ v_min3_u16_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_min3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_min3_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3394,6 +3697,12 @@ v_min_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3436,6 +3745,12 @@ v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3478,6 +3793,12 @@ v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmas v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_i32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3520,6 +3841,12 @@ v_minmax_i32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minmax_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3562,6 +3889,9 @@ v_minmax_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_ma v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_msad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3646,6 +3976,12 @@ v_mul_lo_u16_e64_dpp v255, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bo v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_mullit_f32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3688,6 +4024,12 @@ v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_or3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_or3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3814,6 +4156,12 @@ v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mas v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_perm_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_perm_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3856,6 +4204,9 @@ v_perm_b32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3898,6 +4249,12 @@ v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 ba v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u16_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u16_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3940,6 +4297,12 @@ v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_sad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -3982,6 +4345,9 @@ v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp row_xmask:15 row_mask:0x3 bank v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_sad_u8_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4033,6 +4399,10 @@ v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4089,6 +4459,10 @@ v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x01,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4266,6 +4640,10 @@ v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_mirror // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x40,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 row_mirror +// W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s6, v1, v2 row_half_mirror // W32: [0x05,0x06,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4322,6 +4700,10 @@ v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_half_mirror // W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x41,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s[12:13], v1, s2 row_half_mirror +// W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x00,0x00,0x01,0x41,0x01,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s[12:13], v1, v2 row_shl:1 // W64: [0x05,0x0c,0x02,0xd7,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4364,6 +4746,12 @@ v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp row_xmask:15 row_mask:0x3 b v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xad_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xad_u32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4406,6 +4794,12 @@ v_xad_u32_e64_dpp v255, v255, v255, src_scc row_xmask:15 row_mask:0x3 bank_mask: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_xor3_b32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x14,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -4770,7 +5164,7 @@ v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x66,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x04,0x00] v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX12: [0x00,0x60,0x66,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] @@ -4791,7 +5185,7 @@ v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_ma // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x67,0xd6,0xfa,0x04,0x0c,0x04,0x01,0xe4,0x00,0x00] v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] quad_perm:[0,1,2,3] row_mask:0x0 bank_mask:0x0 fi:1 // GFX12: [0x00,0x60,0x67,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x04,0x00] @@ -4973,6 +5367,12 @@ v_maximum_f16 v255, -|v255|, -|v255| row_xmask:15 row_mask:0x3 bank_mask:0x0 bou v_minimum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5015,6 +5415,12 @@ v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_m v_maximum3_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximum3_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximum3_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5057,6 +5463,12 @@ v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 row_m v_minimum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5099,6 +5511,12 @@ v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_mask:0x v_maximum3_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximum3_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximum3_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximum3_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5180,6 +5598,12 @@ v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f32 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f32 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimummaximum_f32 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5222,6 +5646,12 @@ v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 row_xmask:15 v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_maximumminimum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_maximumminimum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_maximumminimum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] @@ -5264,6 +5694,12 @@ v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp row_xmask:15 row_m v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] +v_minimummaximum_f16 v5, v1, s2, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] + +v_minimummaximum_f16 v5, v1, 2.0, v3 quad_perm:[3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0xe8,0x0d,0x04,0x01,0x1b,0x00,0xff] + v_minimummaximum_f16 v5, v1, v2, v3 quad_perm:[0,1,2,3] // GFX12: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s index 0e84765..25b13ac 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s @@ -6,6 +6,12 @@ v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_add3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -47,6 +53,10 @@ v_add_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s105, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x69,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, vcc_lo, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -67,6 +77,10 @@ v_add_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_add_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x00,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_add_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x6a,0x00,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -81,6 +95,12 @@ v_add_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_add_lshl_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_add_lshl_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -144,6 +164,12 @@ v_add_nc_u16_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbit_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbit_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -177,6 +203,12 @@ v_alignbit_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_alignbyte_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_alignbyte_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -219,6 +251,12 @@ v_and_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_and_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_and_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -273,6 +311,12 @@ v_bcnt_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfe_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -309,6 +353,12 @@ v_bfe_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfe_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfe_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -345,6 +395,12 @@ v_bfe_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_bfi_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_bfi_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -391,6 +447,14 @@ v_cndmask_b16_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b16_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x5d,0xd6,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x5d,0xd6,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -423,12 +487,22 @@ v_cndmask_b16_e64_dpp v5, -v1, |v2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 // W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xea,0x21,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b16_e64_dpp v5, -v1, |s2|, ttmp[14:15] dpp8:[7,6,5,4,3,2,1,0] fi:1 +// W64: [0x05,0x02,0x5d,0xd6,0xea,0x04,0xe8,0x21,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b16_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0xff,0x03,0x5d,0xd6,0xe9,0xfe,0xf3,0x61,0xff,0x00,0x00,0x00] v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubeid_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubeid_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -465,6 +539,12 @@ v_cubeid_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubema_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubema_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -501,6 +581,12 @@ v_cubema_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubesc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubesc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -537,6 +623,12 @@ v_cubesc_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cubetc_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cubetc_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -687,6 +779,12 @@ v_cvt_pk_u16_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_cvt_pk_u8_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_cvt_pk_u8_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -771,6 +869,12 @@ v_cvt_pk_norm_u16_f32_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_div_fixup_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_div_fixup_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x54,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -807,6 +911,12 @@ v_div_fixup_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0 v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x48,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -843,6 +953,12 @@ v_fma_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0 v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_fma_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_fma_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -891,6 +1007,9 @@ v_ldexp_f32_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lerp_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -927,6 +1046,12 @@ v_lerp_u8_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lshl_add_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_add_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -963,6 +1088,12 @@ v_lshl_add_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_lshl_or_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_lshl_or_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1017,6 +1148,12 @@ v_lshrrev_b16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1050,6 +1187,12 @@ v_mad_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i32_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1086,6 +1229,12 @@ v_mad_i32_i16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_i32_i24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_i32_i24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1122,6 +1271,12 @@ v_mad_i32_i24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1155,6 +1310,12 @@ v_mad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u32_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1191,6 +1352,12 @@ v_mad_u32_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mad_u32_u24_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mad_u32_u24_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1227,6 +1394,12 @@ v_mad_u32_u24_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi: v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1263,6 +1436,12 @@ v_max3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1299,6 +1478,12 @@ v_max3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1332,6 +1517,12 @@ v_max3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1368,6 +1559,12 @@ v_max3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1401,6 +1598,12 @@ v_max3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_max3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_max3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1455,6 +1658,12 @@ v_max_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1491,6 +1700,12 @@ v_maxmin_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x69,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1527,6 +1742,12 @@ v_maxmin_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1563,6 +1784,12 @@ v_maxmin_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maxmin_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maxmin_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1617,6 +1844,12 @@ v_mbcnt_lo_u32_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1653,6 +1886,12 @@ v_med3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1689,6 +1928,12 @@ v_med3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1722,6 +1967,12 @@ v_med3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1758,6 +2009,12 @@ v_med3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1791,6 +2048,12 @@ v_med3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_med3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_med3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1827,6 +2090,12 @@ v_med3_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1863,6 +2132,12 @@ v_min3_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0, v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x29,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1899,6 +2174,12 @@ v_min3_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0, v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1932,6 +2213,12 @@ v_min3_i16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -1968,6 +2255,12 @@ v_min3_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2001,6 +2294,12 @@ v_min3_u16_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_min3_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_min3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2055,6 +2354,12 @@ v_min_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_num_f16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f16_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2091,6 +2396,12 @@ v_minmax_num_f16_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_num_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_num_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x68,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2127,6 +2438,12 @@ v_minmax_num_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0, v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_i32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_i32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2163,6 +2480,12 @@ v_minmax_i32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minmax_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minmax_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2199,6 +2522,9 @@ v_minmax_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_msad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2244,6 +2570,12 @@ v_mul_lo_u16_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_mullit_f32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_mullit_f32_e64_dpp v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2280,6 +2612,12 @@ v_mullit_f32_e64_dpp v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_or3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_or3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2337,6 +2675,12 @@ v_pack_b32_f16_e64_dpp v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_perm_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_perm_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2373,6 +2717,9 @@ v_perm_b32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_hi_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2409,6 +2756,12 @@ v_sad_hi_u8_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u16_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u16_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2445,6 +2798,12 @@ v_sad_u16_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_sad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2481,6 +2840,9 @@ v_sad_u32_e64_dpp v255, v255, v255, src_scc clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_sad_u8_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2518,6 +2880,10 @@ v_sub_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2538,6 +2904,10 @@ v_sub_co_u32_e64_dpp v5, s[12:13], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_sub_co_u32_e64_dpp v5, s[12:13], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x0c,0x01,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_sub_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x01,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2584,6 +2954,10 @@ v_subrev_co_u32_e64_dpp v5, s6, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s6, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x06,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x69,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2608,6 +2982,10 @@ v_subrev_co_u32_e64_dpp v5, s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_subrev_co_u32_e64_dpp v5, s[104:105], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x68,0x02,0xd7,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_subrev_co_u32_e64_dpp v5, vcc, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x6a,0x02,0xd7,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2622,6 +3000,12 @@ v_subrev_co_u32_e64_dpp v255, null, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_xad_u32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xad_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2658,6 +3042,12 @@ v_xad_u32_e64_dpp v255, v255, v255, src_scc dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_xor3_b32_e64_dpp v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_xor3_b32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] + v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -2983,7 +3373,7 @@ v_dot2_f16_f16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_f16_f16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x66,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] v_dot2_f16_f16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] // GFX12: [0x00,0x60,0x66,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] @@ -3004,7 +3394,7 @@ v_dot2_bf16_bf16_e64_dpp v0, s1, v2, v3 dpp8:[0,1,2,3,4,4,4,4] // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction v_dot2_bf16_bf16_e64_dpp v0, v1, s2, v3 dpp8:[0,1,2,3,4,4,4,4] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: invalid operand for instruction +// GFX12: [0x00,0x00,0x67,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x88,0x46,0x92] v_dot2_bf16_bf16_e64_dpp v0, v1, v2, v3 op_sel:[0,0,1,1] dpp8:[0,1,2,3,4,4,4,4] // GFX12: [0x00,0x60,0x67,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x88,0x46,0x92] @@ -3066,6 +3456,12 @@ v_maximum_f16 v255, -|v255|, -|v255| dpp8:[0,0,0,0,0,0,0,0] fi:0 v_minimum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3102,6 +3498,12 @@ v_minimum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0, v_maximum3_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximum3_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximum3_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3138,6 +3540,12 @@ v_maximum3_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0,0,0,0, v_minimum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3174,6 +3582,12 @@ v_minimum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] f v_maximum3_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximum3_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximum3_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximum3_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3210,6 +3624,12 @@ v_maximum3_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0,0,0] f v_maximumminimum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximumminimum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximumminimum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3246,6 +3666,12 @@ v_maximumminimum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_minimummaximum_f32 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimummaximum_f32 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f32 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimummaximum_f32 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3282,6 +3708,12 @@ v_minimummaximum_f32 v255, -|v255|, -|v255|, -|src_scc| clamp div:2 dpp8:[0,0,0, v_maximumminimum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_maximumminimum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_maximumminimum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_maximumminimum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] @@ -3318,6 +3750,12 @@ v_maximumminimum_f16 v255, -|v255|, -|v255|, -|src_scc| clamp dpp8:[0,0,0,0,0,0, v_minimummaximum_f16 v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] +v_minimummaximum_f16 v5, v1, s2, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0c,0x04,0x01,0x77,0x39,0x05] + +v_minimummaximum_f16 v5, v1, 2.0, v3 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0xe8,0x0d,0x04,0x01,0x77,0x39,0x05] + v_minimummaximum_f16 v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s index ab88ec8..2b7830c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp16.s @@ -128,6 +128,12 @@ v_add_f16_e64_dpp v5, v1, v2 row_shl:1 v_add_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x32,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -170,6 +176,12 @@ v_add_f32_e64_dpp v5, v1, v2 row_shl:1 v_add_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x03,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -212,6 +224,12 @@ v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_add_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_add_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_add_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_add_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -254,6 +272,12 @@ v_and_b32_e64_dpp v5, v1, v2 row_shl:1 v_and_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_and_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_and_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_and_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -296,6 +320,12 @@ v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:1 v_ashrrev_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_ashrrev_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ashrrev_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_ashrrev_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -445,6 +475,12 @@ v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -487,6 +523,12 @@ v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:1 v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -529,6 +571,12 @@ v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:1 v_ldexp_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_ldexp_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_ldexp_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_ldexp_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -571,6 +619,12 @@ v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:1 v_lshlrev_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_lshlrev_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_lshlrev_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_lshlrev_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -613,6 +667,12 @@ v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:1 v_lshrrev_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_lshrrev_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_lshrrev_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_lshrrev_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -655,6 +715,12 @@ v_max_num_f16_e64_dpp v5, v1, v2 row_shl:1 v_max_num_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x31,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_num_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -697,6 +763,12 @@ v_max_num_f32_e64_dpp v5, v1, v2 row_shl:1 v_max_num_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_num_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_num_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x16,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_num_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -739,6 +811,12 @@ v_max_i32_e64_dpp v5, v1, v2 row_shl:1 v_max_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -781,6 +859,12 @@ v_max_u32_e64_dpp v5, v1, v2 row_shl:1 v_max_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_max_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_max_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_max_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -823,6 +907,12 @@ v_min_num_f16_e64_dpp v5, v1, v2 row_shl:1 v_min_num_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_num_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x30,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_num_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -865,6 +955,12 @@ v_min_num_f32_e64_dpp v5, v1, v2 row_shl:1 v_min_num_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_num_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_num_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x15,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_num_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -907,6 +1003,12 @@ v_min_i32_e64_dpp v5, v1, v2 row_shl:1 v_min_i32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_i32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_i32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_i32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -949,6 +1051,12 @@ v_min_u32_e64_dpp v5, v1, v2 row_shl:1 v_min_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_min_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_min_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_min_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -991,6 +1099,12 @@ v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_dx9_zero_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1033,6 +1147,12 @@ v_mul_f16_e64_dpp v5, v1, v2 row_shl:1 v_mul_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x35,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1075,6 +1195,12 @@ v_mul_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x08,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1117,6 +1243,12 @@ v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:1 v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_hi_i32_i24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_i32_i24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_hi_i32_i24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1159,6 +1291,12 @@ v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:1 v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_hi_u32_u24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_hi_u32_u24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_hi_u32_u24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1201,6 +1339,12 @@ v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:1 v_mul_i32_i24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_i32_i24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_i32_i24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_i32_i24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1243,6 +1387,12 @@ v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:1 v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_legacy_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_legacy_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x07,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_legacy_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1285,6 +1435,12 @@ v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:1 v_mul_u32_u24_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_mul_u32_u24_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_mul_u32_u24_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_mul_u32_u24_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1327,6 +1483,12 @@ v_or_b32_e64_dpp v5, v1, v2 row_shl:1 v_or_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_or_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_or_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_or_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1476,6 +1638,12 @@ v_sub_f16_e64_dpp v5, v1, v2 row_shl:1 v_sub_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x33,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1518,6 +1686,12 @@ v_sub_f32_e64_dpp v5, v1, v2 row_shl:1 v_sub_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x04,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1560,6 +1734,12 @@ v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_sub_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_sub_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_sub_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_sub_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1709,6 +1889,12 @@ v_subrev_f16_e64_dpp v5, v1, v2 row_shl:1 v_subrev_f16_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_f16_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f16_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x34,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_f16_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1751,6 +1937,12 @@ v_subrev_f32_e64_dpp v5, v1, v2 row_shl:1 v_subrev_f32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_f32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_f32_e64_dpp v5, v1, 2.0 row_shl:15 +// GFX12: [0x05,0x00,0x05,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_f32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1793,6 +1985,12 @@ v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:1 v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_subrev_nc_u32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_subrev_nc_u32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_subrev_nc_u32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1835,6 +2033,12 @@ v_xnor_b32_e64_dpp v5, v1, v2 row_shl:1 v_xnor_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_xnor_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_xnor_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_xnor_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] @@ -1877,6 +2081,12 @@ v_xor_b32_e64_dpp v5, v1, v2 row_shl:1 v_xor_b32_e64_dpp v5, v1, v2 row_shl:15 // GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] +v_xor_b32_e64_dpp v5, v1, s2 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_xor_b32_e64_dpp v5, v1, 10 row_shl:15 +// GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x14,0x01,0x00,0x01,0x0f,0x01,0xff] + v_xor_b32_e64_dpp v5, v1, v2 row_shr:1 // GFX12: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x11,0x01,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s index dc151d66..b18029d 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3_from_vop2_dpp8.s @@ -45,6 +45,12 @@ v_add_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0 v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x32,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -57,6 +63,12 @@ v_add_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x03,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -69,6 +81,12 @@ v_add_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_add_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_add_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x25,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x25,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -78,6 +96,12 @@ v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_and_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_and_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1b,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -87,6 +111,12 @@ v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_ashrrev_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_ashrrev_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1a,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -97,14 +127,30 @@ v_cndmask_b32_e64_dpp v5, v1, v2, s3 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0e,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, 10, s3 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x14,0x0d,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, s105 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa6,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s105 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa4,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xac,0x01,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc_lo dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -117,10 +163,22 @@ v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x18,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cndmask_b32_e64_dpp v5, v1, s2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa0,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cndmask_b32_e64_dpp v5, v1, 10, s[104:105] dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x05,0x00,0x01,0xd5,0xe9,0x14,0xa1,0x01,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cndmask_b32_e64_dpp v5, |v1|, -v2, vcc dpp8:[7,6,5,4,3,2,1,0] // W64: [0x05,0x01,0x01,0xd5,0xe9,0x04,0xaa,0x41,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -135,6 +193,12 @@ v_cndmask_b32_e64_dpp v255, -|v255|, -|v255|, null dpp8:[0,0,0,0,0,0,0,0] fi:0 v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] @@ -147,6 +211,12 @@ v_cvt_pk_rtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cvt_pkrtz_f16_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x2f,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cvt_pkrtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] @@ -159,9 +229,18 @@ v_cvt_pkrtz_f16_f32_e64_dpp v255, -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_ldexp_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] +v_ldexp_f16_e64_dpp v5, v1, s2 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x00,0x08,0x01,0x77,0x39,0x05] + +v_ldexp_f16_e64_dpp v5, v1, 2.0 mul:2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x3b,0xd5,0xe9,0xe8,0x01,0x08,0x01,0x77,0x39,0x05] + v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x3b,0xd5,0xea,0x04,0x02,0x10,0x01,0x77,0x39,0x05] @@ -171,6 +250,12 @@ v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_lshlrev_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_lshlrev_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x18,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x18,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -180,6 +265,12 @@ v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_lshrrev_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_lshrrev_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x19,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x19,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -189,6 +280,12 @@ v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x31,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -201,6 +298,12 @@ v_max_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_num_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_num_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x16,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -213,6 +316,12 @@ v_max_num_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x12,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x12,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -222,6 +331,12 @@ v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_max_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_max_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x14,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x14,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -231,6 +346,12 @@ v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_num_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x30,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -243,6 +364,12 @@ v_min_num_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_num_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_num_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x15,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -255,6 +382,12 @@ v_min_num_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_i32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_i32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x11,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x11,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -264,6 +397,12 @@ v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_min_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_min_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x13,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x13,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -273,6 +412,12 @@ v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_dx9_zero_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_dx9_zero_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -285,6 +430,12 @@ v_mul_dx9_zero_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0, v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x35,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -297,6 +448,12 @@ v_mul_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x08,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -309,6 +466,12 @@ v_mul_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_hi_i32_i24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_i32_i24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0a,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0a,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -318,6 +481,12 @@ v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_hi_u32_u24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_hi_u32_u24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0c,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -327,6 +496,12 @@ v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_i32_i24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_i32_i24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x09,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x09,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -336,6 +511,12 @@ v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_mul_legacy_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_legacy_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_legacy_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x07,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_legacy_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -348,6 +529,12 @@ v_mul_legacy_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0, v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_mul_u32_u24_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_mul_u32_u24_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x0b,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x0b,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -357,6 +544,12 @@ v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_or_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_or_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1c,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1c,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -405,6 +598,12 @@ v_sub_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0,0,0 v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x33,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -417,6 +616,12 @@ v_sub_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x04,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -429,6 +634,12 @@ v_sub_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_sub_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_sub_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x26,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x26,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -477,6 +688,12 @@ v_subrev_co_ci_u32_e64_dpp v255, null, v255, v255, null clamp dpp8:[0,0,0,0,0,0, v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_f16_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f16_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x34,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -489,6 +706,12 @@ v_subrev_f16_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] f v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_f32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_f32_e64_dpp v5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x05,0xd5,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] @@ -501,6 +724,12 @@ v_subrev_f32_e64_dpp v255, -|v255|, -|v255| clamp div:2 dpp8:[0,0,0,0,0,0,0,0] f v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_subrev_nc_u32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_subrev_nc_u32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x27,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x27,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -510,6 +739,12 @@ v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_xnor_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_xnor_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1e,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1e,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] @@ -519,6 +754,12 @@ v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_xor_b32_e64_dpp v5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_xor_b32_e64_dpp v5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// GFX12: [0x05,0x00,0x1d,0xd5,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x05,0x00,0x1d,0xd5,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s index 8b2565c..245ca5f 100644 --- a/llvm/test/MC/AMDGPU/gfx12_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_err.s @@ -127,3 +127,19 @@ s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 24-bit signed offset // GFX12-ERR: s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: ^ + +v_cmp_le_f32 vcc_lo, v1, s2 row_mirror +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmpx_gt_u16 v1, s2 op_sel:[1,1] quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed + +v_cmpx_class_f16_u16 v1, 2.0 quad_perm:[1,1,1,1] +// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: +// FIXME add test when VOPC e64_dpp src1 asm is fixed diff --git a/llvm/test/MC/AMDGPU/vop_dpp.s b/llvm/test/MC/AMDGPU/vop_dpp.s index b2251f5..a15a48e 100644 --- a/llvm/test/MC/AMDGPU/vop_dpp.s +++ b/llvm/test/MC/AMDGPU/vop_dpp.s @@ -648,8 +648,8 @@ v_mov_b32 v0, s1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 v_and_b32 v0, s42, v1 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 // NOSICI: :[[@LINE+3]]:{{[0-9]+}}: error: not a valid operand. -// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: invalid operand for instruction -// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: invalid operand for instruction +// NOVI: :[[@LINE+2]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode +// NOGFX9: :[[@LINE+1]]:{{[0-9]+}}: error: operands are not valid for this GPU or mode v_add_f32 v0, v1, s45 row_shl:1 row_mask:0xa bank_mask:0x1 bound_ctrl:0 //===----------------------------------------------------------------------===// diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt index 6ab3e08..52426d3 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx1150_dasm_features.txt @@ -17,3 +17,12 @@ # GFX1150: v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] 0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX1150: v_add_f32_e64_dpp v5, v1, s2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff + +# GFX1150: v_min3_f16_e64_dpp v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] +0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff + +# GFX1150: v_cmp_le_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt index 1be97b2..1d69134 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_features.txt @@ -22,3 +22,7 @@ # This is more strict than the check in vinterp-fake16.txt and is GFX12 specific. # GFX12: v_interp_p10_f32 v0, v1, v2, v3 wait_exp:0 ; encoding: [0x00,0x00,0x00,0xcd,0x01,0x05,0x0e,0x04] 0x00,0x00,0xe0,0xcd,0x01,0x05,0x0e,0x1c + +# Regression test for future fixes to VOPC _e64_dpp src1 +# GFX12: v_cmp_le_f32 vcc_lo, v1, v2 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt index 4303c6d..0771e64 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt @@ -4,6 +4,12 @@ # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add3_u32_e64_dpp v5, v1, 15, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x55,0xd6,0xfa,0x1e,0x0d,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x55,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x55,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -101,6 +107,9 @@ # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x47,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x47,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -185,6 +194,9 @@ # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbit_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x16,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x16,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -227,6 +239,9 @@ # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x17,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x17,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -311,6 +326,9 @@ # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x57,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x57,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -437,6 +455,9 @@ # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x11,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -479,6 +500,9 @@ # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x10,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x10,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -521,6 +545,9 @@ # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -660,6 +687,9 @@ # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -702,6 +732,9 @@ # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -744,6 +777,9 @@ # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -786,6 +822,9 @@ # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1104,6 +1143,9 @@ # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x26,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1230,6 +1272,9 @@ # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1314,6 +1359,9 @@ # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x15,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x15,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1356,6 +1404,9 @@ # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x46,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x46,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1398,6 +1449,9 @@ # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x56,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x56,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1524,6 +1578,9 @@ # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1566,6 +1623,9 @@ # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1608,6 +1668,9 @@ # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1650,6 +1713,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1668,6 +1734,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0xaa,0x01,0x01,0x11,0x01,0xff +# GFX12: v_max3_i32_e64_dpp v5, v1, 15, vcc_lo row_shr:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff] +0x05,0x00,0x1d,0xd6,0xfa,0x1e,0xa9,0x01,0x01,0x11,0x01,0xff + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, ttmp15 row_shr:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff] 0x05,0x00,0x1d,0xd6,0xfa,0x04,0xee,0x01,0x01,0x1f,0x01,0xff @@ -1692,6 +1761,9 @@ # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1818,6 +1890,12 @@ # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, -2.0, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6b,0xd6,0xfa,0xea,0x0d,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x6b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1860,6 +1938,9 @@ # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x69,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x69,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1902,6 +1983,9 @@ # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x64,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x64,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -1944,6 +2028,9 @@ # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x62,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x62,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2070,6 +2157,9 @@ # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x31,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x31,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2112,6 +2202,9 @@ # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x20,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x20,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2154,6 +2247,9 @@ # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x21,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x21,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2196,6 +2292,9 @@ # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x29,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x29,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2238,6 +2337,9 @@ # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2280,6 +2382,9 @@ # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2406,6 +2511,9 @@ # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x6a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2448,6 +2556,9 @@ # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x68,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x68,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2490,6 +2601,9 @@ # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x65,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x65,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2532,6 +2646,9 @@ # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x63,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x63,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2574,6 +2691,9 @@ # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x39,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x39,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2658,6 +2778,9 @@ # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x18,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2700,6 +2823,9 @@ # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x58,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x58,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2784,6 +2910,9 @@ # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x44,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x44,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2826,6 +2955,9 @@ # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x23,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x23,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2868,6 +3000,9 @@ # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x24,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x24,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2910,6 +3045,9 @@ # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x25,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -2952,6 +3090,9 @@ # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x22,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x22,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3146,6 +3287,9 @@ # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x45,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x45,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3188,6 +3332,9 @@ # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x40,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x40,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3440,6 +3587,9 @@ # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x54,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x54,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3482,6 +3632,9 @@ # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x48,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x48,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3524,6 +3677,9 @@ # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x53,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x53,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3566,6 +3722,9 @@ # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x5a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x5a,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3608,6 +3767,9 @@ # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x41,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x41,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3650,6 +3812,9 @@ # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x59,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x59,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x59,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3692,6 +3857,9 @@ # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2c,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3734,6 +3902,9 @@ # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3776,6 +3947,9 @@ # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3818,6 +3992,9 @@ # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x32,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3860,6 +4037,9 @@ # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x50,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x50,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3902,6 +4082,9 @@ # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x51,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x51,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -3944,6 +4127,9 @@ # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_num_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2b,0xd6,0xfa,0x04,0xfe,0x07,0x01,0xe4,0x00,0xff @@ -3986,6 +4172,9 @@ # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4a,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4a,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -4028,6 +4217,9 @@ # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x4b,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff] 0x05,0x00,0x4b,0xd6,0xfa,0x04,0x0e,0x04,0x01,0xe4,0x00,0xff @@ -4417,6 +4609,9 @@ # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4459,6 +4654,9 @@ # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4501,6 +4699,9 @@ # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x30,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x30,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4543,6 +4744,9 @@ # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimum3_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x2f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4585,6 +4789,9 @@ # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6d,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6d,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4627,6 +4834,9 @@ # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6c,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6c,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4669,6 +4879,9 @@ # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6f,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6f,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff @@ -4711,6 +4924,9 @@ # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff] 0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x1b,0x00,0xff +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff] +0x05,0x00,0x6e,0xd6,0xfa,0x06,0x0c,0x04,0x01,0x1b,0x00,0xff + # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 row_mirror row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff] 0x05,0x00,0x6e,0xd6,0xfa,0x04,0x0e,0x04,0x01,0x40,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt index c73ffe7..a836ada 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt @@ -4,6 +4,12 @@ # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_add3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_add3_u32_e64_dpp v5, v1, 10, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x55,0xd6,0xe9,0x14,0x0d,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_add3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x55,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -59,6 +65,9 @@ # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_add_lshl_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x47,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_add_lshl_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x47,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -101,6 +110,9 @@ # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_alignbit_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x16,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_alignbit_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -134,6 +146,9 @@ # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_alignbyte_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x17,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_alignbyte_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x17,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -173,6 +188,9 @@ # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_and_or_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x57,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_and_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x57,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -221,6 +239,9 @@ # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfe_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfe_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -257,6 +278,9 @@ # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x10,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfe_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x10,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfe_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x10,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -293,6 +317,9 @@ # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_bfi_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_bfi_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -354,6 +381,9 @@ # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubeid_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubeid_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -390,6 +420,9 @@ # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubema_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubema_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0f,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -426,6 +459,9 @@ # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubesc_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubesc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -462,6 +498,9 @@ # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cubetc_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cubetc_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -582,6 +621,9 @@ # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_cvt_pk_u8_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -642,6 +684,9 @@ # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_fma_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_fma_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -690,6 +735,9 @@ # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lerp_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x15,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lerp_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -726,6 +774,9 @@ # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lshl_add_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x46,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lshl_add_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x46,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -762,6 +813,9 @@ # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x56,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_lshl_or_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x56,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_lshl_or_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x56,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -810,6 +864,9 @@ # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i32_i24_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i32_i24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -846,6 +903,9 @@ # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u32_u24_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u32_u24_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -882,6 +942,9 @@ # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -918,6 +981,9 @@ # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -954,6 +1020,9 @@ # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1002,6 +1071,9 @@ # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x6b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1038,6 +1110,9 @@ # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x69,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x69,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x69,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1074,6 +1149,9 @@ # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x64,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x64,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1110,6 +1188,9 @@ # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maxmin_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x62,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maxmin_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x62,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1158,6 +1239,9 @@ # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x31,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1194,6 +1278,9 @@ # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x20,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x20,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x20,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1230,6 +1317,9 @@ # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x21,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x21,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x21,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1266,6 +1356,9 @@ # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x29,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x29,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x29,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1302,6 +1395,9 @@ # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1338,6 +1434,9 @@ # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1386,6 +1485,9 @@ # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x6a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1422,6 +1524,9 @@ # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x68,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_num_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x68,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_num_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x68,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1458,6 +1563,9 @@ # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x65,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_i32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x65,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_i32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x65,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1494,6 +1602,9 @@ # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minmax_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x63,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minmax_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x63,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1530,6 +1641,9 @@ # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_msad_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x39,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_msad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x39,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1572,6 +1686,9 @@ # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mullit_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mullit_f32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1608,6 +1725,9 @@ # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_or3_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x58,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_or3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x58,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1650,6 +1770,9 @@ # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_perm_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x44,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_perm_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x44,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1686,6 +1809,9 @@ # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_hi_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x23,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_hi_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x23,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1722,6 +1848,9 @@ # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x24,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x24,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x24,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1758,6 +1887,9 @@ # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1794,6 +1926,9 @@ # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x22,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_sad_u8_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x22,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_sad_u8_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x22,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1874,6 +2009,9 @@ # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_xad_u32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x45,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_xad_u32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x45,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -1910,6 +2048,9 @@ # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_xor3_b32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x40,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_xor3_b32_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x40,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2006,6 +2147,9 @@ # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_div_fixup_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x54,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_div_fixup_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x54,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2048,6 +2192,12 @@ # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_fma_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x48,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + +# GFX12: v_fma_f16_e64_dpp v5, v1, 4.0, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x48,0xd6,0xe9,0xec,0x0d,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_fma_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x48,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2090,6 +2240,9 @@ # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x53,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x53,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2129,6 +2282,9 @@ # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_i32_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x5a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_i32_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x5a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2171,6 +2327,9 @@ # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x41,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x41,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2210,6 +2369,9 @@ # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x59,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_mad_u32_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x59,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_mad_u32_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x59,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2252,6 +2414,9 @@ # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2c,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2294,6 +2459,9 @@ # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4d,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2333,6 +2501,9 @@ # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_max3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_max3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4e,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2372,6 +2543,9 @@ # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2414,6 +2588,9 @@ # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x50,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x50,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2453,6 +2630,9 @@ # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_med3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x51,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_med3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x51,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2492,6 +2672,9 @@ # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_num_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_num_f16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x2b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2534,6 +2717,9 @@ # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_i16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4a,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_i16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4a,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2573,6 +2759,9 @@ # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_min3_u16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x4b,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_min3_u16_e64_dpp v5, v1, v2, v255 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05] 0x05,0x00,0x4b,0xd6,0xe9,0x04,0xfe,0x07,0x01,0x77,0x39,0x05 @@ -2752,6 +2941,9 @@ # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximum3_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2788,6 +2980,9 @@ # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimum3_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimum3_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2824,6 +3019,9 @@ # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximum3_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2860,6 +3058,9 @@ # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimum3_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimum3_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2896,6 +3097,9 @@ # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6d,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximumminimum_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6d,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximumminimum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6d,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2932,6 +3136,9 @@ # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6c,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimummaximum_f32_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6c,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimummaximum_f32_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6c,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -2968,6 +3175,9 @@ # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6f,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_maximumminimum_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6f,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_maximumminimum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6f,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 @@ -3004,6 +3214,9 @@ # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05] 0x05,0x00,0x6e,0xd6,0xe9,0x04,0x0e,0x04,0x01,0x77,0x39,0x05 +# GFX12: v_minimummaximum_f16_e64_dpp v5, v1, s3, v3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05] +0x05,0x00,0x6e,0xd6,0xe9,0x06,0x0c,0x04,0x01,0x77,0x39,0x05 + # GFX12: v_minimummaximum_f16_e64_dpp v5, v1, v2, vcc_hi dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x6e,0xd6,0xe9,0x04,0xae,0x01,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt index 56d7805b..b10b8da 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp16.txt @@ -59,6 +59,9 @@ # GFX12: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x32,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x32,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -101,6 +104,9 @@ # GFX12: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x03,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x03,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -143,6 +149,9 @@ # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_add_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x25,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x25,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -185,6 +194,9 @@ # GFX12: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_and_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_and_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -227,6 +239,9 @@ # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ashrrev_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -270,6 +285,10 @@ # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0x1b,0x00,0xff +# W32: v_cndmask_b32_e64_dpp v5, v1, s3, s6 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff] +# W64: v_cndmask_b32_e64_dpp v5, v1, s3, s[6:7] quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x01,0xd5,0xfa,0x06,0x18,0x00,0x01,0x1b,0x00,0xff + # W32: v_cndmask_b32_e64_dpp v5, v1, v2, s6 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x01,0xd5,0xfa,0x04,0x1a,0x00,0x01,0xe4,0x00,0xff @@ -324,6 +343,9 @@ # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x2f,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x2f,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -366,6 +388,9 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -390,6 +415,9 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x21,0x01,0xff +# GFX12: v_ldexp_f16_e64_dpp v5, v1, 2.0 row_ror:1 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x21,0x01,0xff] +0x05,0x00,0x3b,0xd5,0xfa,0xe8,0x01,0x00,0x01,0x21,0x01,0xff + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x05,0x00,0x3b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff @@ -408,6 +436,9 @@ # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshlrev_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x18,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x18,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -450,6 +481,9 @@ # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_lshrrev_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x19,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x19,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -492,6 +526,9 @@ # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x31,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x31,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -534,6 +571,9 @@ # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_num_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x16,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x16,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -576,6 +616,9 @@ # GFX12: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x12,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x12,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -618,6 +661,9 @@ # GFX12: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_max_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x14,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_max_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x14,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -660,6 +706,9 @@ # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x30,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x30,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -702,6 +751,9 @@ # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_num_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x15,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x15,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -744,6 +796,9 @@ # GFX12: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_i32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x11,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_i32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x11,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -786,6 +841,9 @@ # GFX12: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_min_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x13,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_min_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x13,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -828,6 +886,9 @@ # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x07,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x07,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -870,6 +931,9 @@ # GFX12: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x35,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x35,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -912,6 +976,9 @@ # GFX12: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x08,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x08,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -954,6 +1021,9 @@ # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0a,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0a,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -996,6 +1066,9 @@ # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1038,6 +1111,9 @@ # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_i32_i24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x09,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x09,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1080,6 +1156,9 @@ # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_mul_u32_u24_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x0b,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x0b,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1122,6 +1201,9 @@ # GFX12: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_or_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1c,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_or_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1c,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1219,6 +1301,9 @@ # GFX12: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x33,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x33,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1261,6 +1346,9 @@ # GFX12: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x04,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x04,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1303,6 +1391,9 @@ # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_sub_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x26,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x26,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1400,6 +1491,9 @@ # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f16_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x34,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x34,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1442,6 +1536,9 @@ # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_f32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x05,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x05,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1484,6 +1581,9 @@ # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_subrev_nc_u32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x27,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x27,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1526,6 +1626,9 @@ # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_xnor_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1e,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1e,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff @@ -1568,6 +1671,9 @@ # GFX12: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff +# GFX12: v_xor_b32_e64_dpp v5, v1, s3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff] +0x05,0x00,0x1d,0xd5,0xfa,0x06,0x00,0x00,0x01,0x1b,0x00,0xff + # GFX12: v_xor_b32_e64_dpp v5, v1, v2 quad_perm:[0,1,2,3] row_mask:0xf bank_mask:0xf ; encoding: [0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] 0x05,0x00,0x1d,0xd5,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt index da7faa8..f78106e 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_from_vop2_dpp8.txt @@ -23,6 +23,9 @@ # GFX12: v_add_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x32,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x32,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x32,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x32,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -35,6 +38,9 @@ # GFX12: v_add_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x03,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x03,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x03,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x03,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -47,18 +53,27 @@ # GFX12: v_add_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x25,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_add_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x25,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x25,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_add_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x25,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_and_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_and_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_and_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_ashrrev_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_ashrrev_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_ashrrev_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -66,6 +81,10 @@ # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x01,0xd5,0xe9,0x04,0x1a,0x00,0x01,0x77,0x39,0x05 +# W32: v_cndmask_b32_e64_dpp v5, v1, s3, s6 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05] +# W64: v_cndmask_b32_e64_dpp v5, v1, s3, s[6:7] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x01,0xd5,0xe9,0x06,0x18,0x00,0x01,0x77,0x39,0x05 + # W32: v_cndmask_b32_e64_dpp v5, v1, v2, s104 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] # W64: v_cndmask_b32_e64_dpp v5, v1, v2, s[104:105] dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05] 0x05,0x00,0x01,0xd5,0xe9,0x04,0xa2,0x01,0x01,0x77,0x39,0x05 @@ -84,6 +103,9 @@ # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x2f,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x2f,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x2f,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cvt_pk_rtz_f16_f32_e64_dpp v5, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x05,0x01,0x2f,0xd5,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -96,30 +118,48 @@ # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x08,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x08,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x08,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v5, v1, v2 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05] 0x05,0x00,0x3b,0xd5,0xe9,0x04,0x02,0x10,0x01,0x77,0x39,0x05 +# GFX12: v_ldexp_f16_e64_dpp v5, v1, s3 mul:4 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x10,0x01,0x77,0x39,0x05] +0x05,0x00,0x3b,0xd5,0xe9,0x06,0x00,0x10,0x01,0x77,0x39,0x05 + # GFX12: v_ldexp_f16_e64_dpp v255, -|v255|, v255 clamp div:2 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00] 0xff,0x81,0x3b,0xd5,0xea,0xfe,0x03,0x38,0xff,0x00,0x00,0x00 # GFX12: v_lshlrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x18,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_lshlrev_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x18,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x18,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_lshlrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x18,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_lshrrev_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x19,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_lshrrev_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x19,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x19,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_lshrrev_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x19,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_max_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x31,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x31,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x31,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x31,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -132,6 +172,9 @@ # GFX12: v_max_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x16,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_num_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x16,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x16,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x16,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -144,18 +187,27 @@ # GFX12: v_max_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x12,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x12,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x12,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x12,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_max_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x14,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_max_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x14,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x14,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_max_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x14,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_min_num_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x30,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x30,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x30,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_num_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x30,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -168,6 +220,9 @@ # GFX12: v_min_num_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x15,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_num_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x15,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x15,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_num_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x15,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -180,18 +235,27 @@ # GFX12: v_min_i32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x11,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_i32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x11,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x11,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_i32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x11,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_min_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x13,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_min_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x13,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x13,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_min_u32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x13,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x07,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_dx9_zero_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x07,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x07,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_dx9_zero_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x07,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -204,6 +268,9 @@ # GFX12: v_mul_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x35,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x35,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x35,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x35,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -216,6 +283,9 @@ # GFX12: v_mul_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x08,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x08,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x08,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x08,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -228,30 +298,45 @@ # GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0a,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_i32_i24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0a,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_hi_i32_i24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0a,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_hi_u32_u24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_hi_u32_u24_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x0c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_i32_i24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x09,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_i32_i24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x09,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x09,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_i32_i24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x09,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_mul_u32_u24_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x0b,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_mul_u32_u24_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x0b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x0b,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_mul_u32_u24_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x0b,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_or_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1c,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_or_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1c,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_or_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1c,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -277,6 +362,9 @@ # GFX12: v_sub_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x33,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x33,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x33,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x33,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -289,6 +377,9 @@ # GFX12: v_sub_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x04,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x04,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x04,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x04,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -301,6 +392,9 @@ # GFX12: v_sub_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x26,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_sub_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x26,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x26,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_sub_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x26,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 @@ -326,6 +420,9 @@ # GFX12: v_subrev_f16_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x34,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f16_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x34,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x34,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_f16_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x34,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -338,6 +435,9 @@ # GFX12: v_subrev_f32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x05,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_f32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x05,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x05,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_f32_e64_dpp v5, |v1|, -v2 mul:2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05] 0x05,0x01,0x05,0xd5,0xe9,0x04,0x02,0x48,0x01,0x77,0x39,0x05 @@ -350,17 +450,26 @@ # GFX12: v_subrev_nc_u32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x27,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_subrev_nc_u32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x27,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x27,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_subrev_nc_u32_e64_dpp v255, v255, v255 clamp dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x80,0x27,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_xnor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1e,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_xnor_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1e,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1e,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_xnor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1e,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_xor_b32_e64_dpp v5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x05,0x00,0x1d,0xd5,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_xor_b32_e64_dpp v5, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x05,0x00,0x1d,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x05,0x00,0x1d,0xd5,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_xor_b32_e64_dpp v255, v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0xff,0x00,0x1d,0xd5,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 -- cgit v1.1 From 4d8a3f5b35b01f8223d2e4c0e63d91cd00e9b1a5 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 16:13:53 +0100 Subject: [VectorCombine][X86] Add some tests showing failure to fold shuffle(cast(x),cast(y)) -> cast(shuffle(x,y)) Part of #67803 --- .../VectorCombine/X86/shuffle-of-casts.ll | 189 +++++++++++++++++++++ 1 file changed, 189 insertions(+) create mode 100644 llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll new file mode 100644 index 0000000..3a7c331 --- /dev/null +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -0,0 +1,189 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,SSE +; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,AVX + +; standard vector concatenations + +define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_zext_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = zext <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %x0 = zext <8 x i16> %a0 to <8 x i32> + %x1 = zext <8 x i16> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} + +define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_sext_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %x0 = sext <8 x i16> %a0 to <8 x i32> + %x1 = sext <8 x i16> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} + +define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { +; CHECK-LABEL: @concat_sext_v4i1_v8i32( +; CHECK-NEXT: [[X0:%.*]] = sext <4 x i1> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i1> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %x0 = sext <4 x i1> %a0 to <4 x i32> + %x1 = sext <4 x i1> %a1 to <4 x i32> + %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> + ret <8 x i32> %r +} + +define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @concat_trunc_v4i32_v8i16( +; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16> +; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %x0 = trunc <4 x i32> %a0 to <4 x i16> + %x1 = trunc <4 x i32> %a1 to <4 x i16> + %r = shufflevector <4 x i16> %x0, <4 x i16> %x1, <8 x i32> + ret <8 x i16> %r +} + +define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr( +; CHECK-NEXT: [[X0:%.*]] = inttoptr <4 x i32> [[A0:%.*]] to <4 x ptr> +; CHECK-NEXT: [[X1:%.*]] = inttoptr <4 x i32> [[A1:%.*]] to <4 x ptr> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x ptr> [[X0]], <4 x ptr> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x ptr> [[R]] +; + %x0 = inttoptr <4 x i32> %a0 to <4 x ptr> + %x1 = inttoptr <4 x i32> %a1 to <4 x ptr> + %r = shufflevector <4 x ptr> %x0, <4 x ptr> %x1, <8 x i32> + ret <8 x ptr> %r +} + +define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { +; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = ptrtoint <8 x ptr> [[A0:%.*]] to <8 x i64> +; CHECK-NEXT: [[X1:%.*]] = ptrtoint <8 x ptr> [[A1:%.*]] to <8 x i64> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i64> [[X0]], <8 x i64> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i64> [[R]] +; + %x0 = ptrtoint <8 x ptr> %a0 to <8 x i64> + %x1 = ptrtoint <8 x ptr> %a1 to <8 x i64> + %r = shufflevector <8 x i64> %x0, <8 x i64> %x1, <16 x i32> + ret <16 x i64> %r +} + +define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { +; CHECK-LABEL: @concat_fpext_v4f32_v8f64( +; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> +; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x double> [[R]] +; + %x0 = fpext <4 x float> %a0 to <4 x double> + %x1 = fpext <4 x float> %a1 to <4 x double> + %r = shufflevector <4 x double> %x0, <4 x double> %x1, <8 x i32> + ret <8 x double> %r +} + +define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> %a1) { +; CHECK-LABEL: @concat_fptrunc_v8f64_v16f32( +; CHECK-NEXT: [[X0:%.*]] = fptrunc <8 x double> [[A0:%.*]] to <8 x float> +; CHECK-NEXT: [[X1:%.*]] = fptrunc <8 x double> [[A1:%.*]] to <8 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x float> [[X0]], <8 x float> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x float> [[R]] +; + %x0 = fptrunc <8 x double> %a0 to <8 x float> + %x1 = fptrunc <8 x double> %a1 to <8 x float> + %r = shufflevector <8 x float> %x0, <8 x float> %x1, <16 x i32> + ret <16 x float> %r +} + +; commuted vector concatenation + +define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @rconcat_sext_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %x0 = sext <8 x i16> %a0 to <8 x i32> + %x1 = sext <8 x i16> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} + +; interleaved shuffle + +define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { +; CHECK-LABEL: @interleave_fpext_v4f32_v8f64( +; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> +; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x double> [[R]] +; + %x0 = fpext <4 x float> %a0 to <4 x double> + %x1 = fpext <4 x float> %a1 to <4 x double> + %r = shufflevector <4 x double> %x0, <4 x double> %x1, <8 x i32> + ret <8 x double> %r +} + +; negative - multiuse + +define <8 x i16> @concat_trunc_v4i32_v8i16_multiuse(<4 x i32> %a0, <4 x i32> %a1, ptr %a2) { +; CHECK-LABEL: @concat_trunc_v4i32_v8i16_multiuse( +; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16> +; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> +; CHECK-NEXT: store <4 x i16> [[X0]], ptr [[A2:%.*]], align 8 +; CHECK-NEXT: ret <8 x i16> [[R]] +; + %x0 = trunc <4 x i32> %a0 to <4 x i16> + %x1 = trunc <4 x i32> %a1 to <4 x i16> + %r = shufflevector <4 x i16> %x0, <4 x i16> %x1, <8 x i32> + store <4 x i16> %x0, ptr %a2 + ret <8 x i16> %r +} + +; negative - bitcasts + +define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { +; CHECK-LABEL: @concat_bitcast_v4i32_v8f32( +; CHECK-NEXT: [[X0:%.*]] = bitcast <4 x i32> [[A0:%.*]] to <4 x float> +; CHECK-NEXT: [[X1:%.*]] = bitcast <4 x i32> [[A1:%.*]] to <4 x float> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x float> [[X0]], <4 x float> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x float> [[R]] +; + %x0 = bitcast <4 x i32> %a0 to <4 x float> + %x1 = bitcast <4 x i32> %a1 to <4 x float> + %r = shufflevector <4 x float> %x0, <4 x float> %x1, <8 x i32> + ret <8 x float> %r +} + +; negative - castop mismatch + +define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_sext_zext_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %x0 = sext <8 x i16> %a0 to <8 x i32> + %x1 = zext <8 x i16> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; AVX: {{.*}} +; SSE: {{.*}} -- cgit v1.1 From a77d3d9a2e5decc814119dc4e0a7b4625a6f6490 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 3 Apr 2024 17:36:00 +0200 Subject: [libc++] Disables -Wweak-vtables diagnostics. (#85577) This is a preparation to use Clang HEAD in the CI. --- libcxx/include/__expected/bad_expected_access.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/libcxx/include/__expected/bad_expected_access.h b/libcxx/include/__expected/bad_expected_access.h index 27f01d9..585b4ec 100644 --- a/libcxx/include/__expected/bad_expected_access.h +++ b/libcxx/include/__expected/bad_expected_access.h @@ -27,6 +27,8 @@ _LIBCPP_BEGIN_NAMESPACE_STD template class bad_expected_access; +_LIBCPP_DIAGNOSTIC_PUSH +_LIBCPP_CLANG_DIAGNOSTIC_IGNORED("-Wweak-vtables") template <> class bad_expected_access : public exception { protected: @@ -44,6 +46,7 @@ public: // it adds deployment target restrictions. _LIBCPP_HIDE_FROM_ABI_VIRTUAL const char* what() const noexcept override { return "bad access to std::expected"; } }; +_LIBCPP_DIAGNOSTIC_POP template class bad_expected_access : public bad_expected_access { -- cgit v1.1 From 362aa434cc31ccca96749a6db8cd97f5b7d71206 Mon Sep 17 00:00:00 2001 From: Hsiangkai Wang Date: Wed, 3 Apr 2024 16:58:01 +0100 Subject: [mlir] Enhance TimingManager Printing Flexibility (#85821) Revise the printing functionality of TimingManager to accommodate various output formats. At present, TimingManager is limited to outputting data solely in plain text format. To overcome this limitation, I have introduced an abstract class that serves as the foundation for printing. This approach allows users to implement additional output formats by extending this abstract class. As part of this update, I have integrated support for JSON as a new output format, enhancing the ease of parsing for subsequent processing scripts. --- mlir/docs/PassManagement.md | 102 ++++++++++++++----- mlir/include/mlir/Support/Timing.h | 62 +++++++++++- mlir/lib/Support/Timing.cpp | 195 ++++++++++++++++++++++++------------- mlir/test/Pass/pass-timing.mlir | 32 ++++++ 4 files changed, 297 insertions(+), 94 deletions(-) diff --git a/mlir/docs/PassManagement.md b/mlir/docs/PassManagement.md index c9d705f..e9ecb99 100644 --- a/mlir/docs/PassManagement.md +++ b/mlir/docs/PassManagement.md @@ -1124,17 +1124,44 @@ pipeline. This display mode is available in mlir-opt via $ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing -mlir-timing-display=list ===-------------------------------------------------------------------------=== - ... Pass execution timing report ... + ... Execution time report ... ===-------------------------------------------------------------------------=== - Total Execution Time: 0.0203 seconds - - ---Wall Time--- --- Name --- - 0.0047 ( 55.9%) Canonicalizer - 0.0019 ( 22.2%) VerifierPass - 0.0016 ( 18.5%) LLVMLoweringPass - 0.0003 ( 3.4%) CSE - 0.0002 ( 1.9%) (A) DominanceInfo - 0.0084 (100.0%) Total + Total Execution Time: 0.0135 seconds + + ----Wall Time---- ----Name---- + 0.0135 (100.0%) root + 0.0041 ( 30.1%) Parser + 0.0018 ( 13.3%) ConvertFuncToLLVMPass + 0.0011 ( 8.2%) Output + 0.0007 ( 5.2%) Pipeline Collection : ['func.func'] + 0.0006 ( 4.6%) 'func.func' Pipeline + 0.0005 ( 3.5%) Canonicalizer + 0.0001 ( 0.9%) CSE + 0.0001 ( 0.5%) (A) DataLayoutAnalysis + 0.0000 ( 0.1%) (A) DominanceInfo + 0.0058 ( 43.2%) Rest + 0.0135 (100.0%) Total +``` + +The results can be displayed in JSON format via `-mlir-output-format=json`. + +```shell +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing -mlir-timing-display=list -mlir-output-format=json + +[ +{"wall": {"duration": 0.0135, "percentage": 100.0}, "name": "root"}, +{"wall": {"duration": 0.0041, "percentage": 30.1}, "name": "Parser"}, +{"wall": {"duration": 0.0018, "percentage": 13.3}, "name": "ConvertFuncToLLVMPass"}, +{"wall": {"duration": 0.0011, "percentage": 8.2}, "name": "Output"}, +{"wall": {"duration": 0.0007, "percentage": 5.2}, "name": "Pipeline Collection : ['func.func']"}, +{"wall": {"duration": 0.0006, "percentage": 4.6}, "name": "'func.func' Pipeline"}, +{"wall": {"duration": 0.0005, "percentage": 3.5}, "name": "Canonicalizer"}, +{"wall": {"duration": 0.0001, "percentage": 0.9}, "name": "CSE"}, +{"wall": {"duration": 0.0001, "percentage": 0.5}, "name": "(A) DataLayoutAnalysis"}, +{"wall": {"duration": 0.0000, "percentage": 0.1}, "name": "(A) DominanceInfo"}, +{"wall": {"duration": 0.0058, "percentage": 43.2}, "name": "Rest"}, +{"wall": {"duration": 0.0135, "percentage": 100.0}, "name": "Total"} +] ``` ##### Tree Display Mode @@ -1149,21 +1176,48 @@ invalidated and recomputed. This is the default display mode. $ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing ===-------------------------------------------------------------------------=== - ... Pass execution timing report ... + ... Execution time report ... ===-------------------------------------------------------------------------=== - Total Execution Time: 0.0249 seconds - - ---Wall Time--- --- Name --- - 0.0058 ( 70.8%) 'func.func' Pipeline - 0.0004 ( 4.3%) CSE - 0.0002 ( 2.6%) (A) DominanceInfo - 0.0004 ( 4.8%) VerifierPass - 0.0046 ( 55.4%) Canonicalizer - 0.0005 ( 6.2%) VerifierPass - 0.0005 ( 5.8%) VerifierPass - 0.0014 ( 17.2%) LLVMLoweringPass - 0.0005 ( 6.2%) VerifierPass - 0.0082 (100.0%) Total + Total Execution Time: 0.0127 seconds + + ----Wall Time---- ----Name---- + 0.0038 ( 30.2%) Parser + 0.0006 ( 4.8%) 'func.func' Pipeline + 0.0001 ( 0.9%) CSE + 0.0000 ( 0.1%) (A) DominanceInfo + 0.0005 ( 3.7%) Canonicalizer + 0.0017 ( 13.7%) ConvertFuncToLLVMPass + 0.0001 ( 0.6%) (A) DataLayoutAnalysis + 0.0010 ( 8.2%) Output + 0.0054 ( 42.5%) Rest + 0.0127 (100.0%) Total +``` + +The results can be displayed in JSON format via `-mlir-output-format=json`. + +```shell +$ mlir-opt foo.mlir -mlir-disable-threading -pass-pipeline='builtin.module(func.func(cse,canonicalize),convert-func-to-llvm)' -mlir-timing -mlir-output-format=json + +[ +{"wall": {"duration": 0.0038, "percentage": 30.2}, "name": "Parser", "passes": [ +{}]}, +{"wall": {"duration": 0.0006, "percentage": 4.8}, "name": "'func.func' Pipeline", "passes": [ + {"wall": {"duration": 0.0001, "percentage": 0.9}, "name": "CSE", "passes": [ + {"wall": {"duration": 0.0000, "percentage": 0.1}, "name": "(A) DominanceInfo", "passes": [ + {}]}, + {}]}, + {"wall": {"duration": 0.0005, "percentage": 3.7}, "name": "Canonicalizer", "passes": [ + {}]}, +{}]}, +{"wall": {"duration": 0.0017, "percentage": 13.7}, "name": "ConvertFuncToLLVMPass", "passes": [ + {"wall": {"duration": 0.0001, "percentage": 0.6}, "name": "(A) DataLayoutAnalysis", "passes": [ + {}]}, +{}]}, +{"wall": {"duration": 0.0010, "percentage": 8.2}, "name": "Output", "passes": [ +{}]}, +{"wall": {"duration": 0.0054, "percentage": 42.5}, "name": "Rest"}, +{"wall": {"duration": 0.0127, "percentage": 100.0}, "name": "Total"} +] ``` ##### Multi-threaded Pass Timing diff --git a/mlir/include/mlir/Support/Timing.h b/mlir/include/mlir/Support/Timing.h index bc3a642..a8a4bfd 100644 --- a/mlir/include/mlir/Support/Timing.h +++ b/mlir/include/mlir/Support/Timing.h @@ -321,6 +321,53 @@ private: }; //===----------------------------------------------------------------------===// +// OutputStrategy +//===----------------------------------------------------------------------===// + +/// Simple record class to record timing information. +struct TimeRecord { + TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} + + TimeRecord &operator+=(const TimeRecord &other) { + wall += other.wall; + user += other.user; + return *this; + } + + TimeRecord &operator-=(const TimeRecord &other) { + wall -= other.wall; + user -= other.user; + return *this; + } + + double wall, user; +}; + +/// Facilities for printing timing reports to various output formats. +/// +/// This is an abstract class that serves as the foundation for printing. +/// Users can implement additional output formats by extending this abstract +/// class. +class OutputStrategy { +public: + OutputStrategy(raw_ostream &os) : os(os) {} + virtual ~OutputStrategy() = default; + + virtual void printHeader(const TimeRecord &total) = 0; + virtual void printFooter() = 0; + virtual void printTime(const TimeRecord &time, const TimeRecord &total) = 0; + virtual void printListEntry(StringRef name, const TimeRecord &time, + const TimeRecord &total, + bool lastEntry = false) = 0; + virtual void printTreeEntry(unsigned indent, StringRef name, + const TimeRecord &time, + const TimeRecord &total) = 0; + virtual void printTreeEntryEnd(unsigned indent, bool lastEntry = false) = 0; + + raw_ostream &os; +}; + +//===----------------------------------------------------------------------===// // DefaultTimingManager //===----------------------------------------------------------------------===// @@ -351,6 +398,15 @@ public: Tree, }; + /// The different output formats for printing the timers. + enum class OutputFormat { + /// In this format the results are displayed in text format. + Text, + + /// In this format the results are displayed in JSON format. + Json, + }; + DefaultTimingManager(); DefaultTimingManager(DefaultTimingManager &&rhs); ~DefaultTimingManager() override; @@ -372,10 +428,7 @@ public: DisplayMode getDisplayMode() const; /// Change the stream where the output will be printed to. - void setOutput(raw_ostream &os); - - /// Return the current output stream where the output will be printed to. - raw_ostream &getOutput() const; + void setOutput(std::unique_ptr output); /// Print and clear the timing results. Only call this when there are no more /// references to nested timers around, as printing post-processes and clears @@ -408,6 +461,7 @@ protected: private: const std::unique_ptr impl; + std::unique_ptr out; }; /// Register a set of useful command-line options that can be used to configure diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 2249312..1d6796e 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -32,6 +32,7 @@ using namespace mlir; using namespace detail; using DisplayMode = DefaultTimingManager::DisplayMode; +using OutputFormat = DefaultTimingManager::OutputFormat; constexpr llvm::StringLiteral kTimingDescription = "... Execution time report ..."; @@ -109,56 +110,105 @@ TimingIdentifier TimingIdentifier::get(StringRef str, TimingManager &tm) { namespace { -/// Simple record class to record timing information. -struct TimeRecord { - TimeRecord(double wall = 0.0, double user = 0.0) : wall(wall), user(user) {} +class OutputTextStrategy : public OutputStrategy { +public: + OutputTextStrategy(raw_ostream &os) : OutputStrategy(os) {} + + void printHeader(const TimeRecord &total) override { + // Figure out how many spaces to description name. + unsigned padding = (80 - kTimingDescription.size()) / 2; + os << "===" << std::string(73, '-') << "===\n"; + os.indent(padding) << kTimingDescription << '\n'; + os << "===" << std::string(73, '-') << "===\n"; - TimeRecord &operator+=(const TimeRecord &other) { - wall += other.wall; - user += other.user; - return *this; + // Print the total time followed by the section headers. + os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall); + if (total.user != total.wall) + os << " ----User Time----"; + os << " ----Wall Time---- ----Name----\n"; } - TimeRecord &operator-=(const TimeRecord &other) { - wall -= other.wall; - user -= other.user; - return *this; + void printFooter() override { os.flush(); } + + void printTime(const TimeRecord &time, const TimeRecord &total) override { + if (total.user != total.wall) { + os << llvm::format(" %8.4f (%5.1f%%)", time.user, + 100.0 * time.user / total.user); + } + os << llvm::format(" %8.4f (%5.1f%%) ", time.wall, + 100.0 * time.wall / total.wall); } - /// Print the current time record to 'os', with a breakdown showing - /// contributions to the give 'total' time record. - void print(raw_ostream &os, const TimeRecord &total) { - if (total.user != total.wall) - os << llvm::format(" %8.4f (%5.1f%%)", user, 100.0 * user / total.user); - os << llvm::format(" %8.4f (%5.1f%%) ", wall, 100.0 * wall / total.wall); + void printListEntry(StringRef name, const TimeRecord &time, + const TimeRecord &total, bool lastEntry) override { + printTime(time, total); + os << name << "\n"; + } + + void printTreeEntry(unsigned indent, StringRef name, const TimeRecord &time, + const TimeRecord &total) override { + printTime(time, total); + os.indent(indent) << name << "\n"; } - double wall, user; + void printTreeEntryEnd(unsigned indent, bool lastEntry) override {} }; -} // namespace +class OutputJsonStrategy : public OutputStrategy { +public: + OutputJsonStrategy(raw_ostream &os) : OutputStrategy(os) {} -/// Utility to print a single line entry in the timer output. -static void printTimeEntry(raw_ostream &os, unsigned indent, StringRef name, - TimeRecord time, TimeRecord total) { - time.print(os, total); - os.indent(indent) << name << "\n"; -} + void printHeader(const TimeRecord &total) override { os << "[" << "\n"; } -/// Utility to print the timer heading information. -static void printTimeHeader(raw_ostream &os, TimeRecord total) { - // Figure out how many spaces to description name. - unsigned padding = (80 - kTimingDescription.size()) / 2; - os << "===" << std::string(73, '-') << "===\n"; - os.indent(padding) << kTimingDescription << '\n'; - os << "===" << std::string(73, '-') << "===\n"; - - // Print the total time followed by the section headers. - os << llvm::format(" Total Execution Time: %.4f seconds\n\n", total.wall); - if (total.user != total.wall) - os << " ----User Time----"; - os << " ----Wall Time---- ----Name----\n"; -} + void printFooter() override { + os << "]" << "\n"; + os.flush(); + } + + void printTime(const TimeRecord &time, const TimeRecord &total) override { + if (total.user != total.wall) { + os << "\"user\": {"; + os << "\"duration\": " << llvm::format("%8.4f", time.user) << ", "; + os << "\"percentage\": " + << llvm::format("%5.1f", 100.0 * time.user / total.user); + os << "}, "; + } + os << "\"wall\": {"; + os << "\"duration\": " << llvm::format("%8.4f", time.wall) << ", "; + os << "\"percentage\": " + << llvm::format("%5.1f", 100.0 * time.wall / total.wall); + os << "}"; + } + + void printListEntry(StringRef name, const TimeRecord &time, + const TimeRecord &total, bool lastEntry) override { + os << "{"; + printTime(time, total); + os << ", \"name\": " << "\"" << name << "\""; + os << "}"; + if (!lastEntry) + os << ","; + os << "\n"; + } + + void printTreeEntry(unsigned indent, StringRef name, const TimeRecord &time, + const TimeRecord &total) override { + os.indent(indent) << "{"; + printTime(time, total); + os << ", \"name\": " << "\"" << name << "\""; + os << ", \"passes\": [" << "\n"; + } + + void printTreeEntryEnd(unsigned indent, bool lastEntry) override { + os.indent(indent) << "{}]"; + os << "}"; + if (!lastEntry) + os << ","; + os << "\n"; + } +}; + +} // namespace //===----------------------------------------------------------------------===// // Timer Implementation for DefaultTimingManager @@ -176,7 +226,8 @@ public: using ChildrenMap = llvm::MapVector>; using AsyncChildrenMap = llvm::DenseMap; - TimerImpl(std::string &&name) : threadId(llvm::get_threadid()), name(name) {} + TimerImpl(std::string &&name, std::unique_ptr &output) + : threadId(llvm::get_threadid()), name(name), output(output) {} /// Start the timer. void start() { startTime = std::chrono::steady_clock::now(); } @@ -206,7 +257,7 @@ public: TimerImpl *nestTail(std::unique_ptr &child, function_ref nameBuilder) { if (!child) - child = std::make_unique(nameBuilder()); + child = std::make_unique(nameBuilder(), output); return child.get(); } @@ -320,7 +371,7 @@ public: } /// Print the timing result in list mode. - void printAsList(raw_ostream &os, TimeRecord total) { + void printAsList(TimeRecord total) { // Flatten the leaf timers in the tree and merge them by name. llvm::StringMap mergedTimers; std::function addTimer = [&](TimerImpl *timer) { @@ -343,34 +394,37 @@ public: // Print the timing information sequentially. for (auto &timeData : timerNameAndTime) - printTimeEntry(os, 0, timeData.first, timeData.second, total); + output->printListEntry(timeData.first, timeData.second, total); } /// Print the timing result in tree mode. - void printAsTree(raw_ostream &os, TimeRecord total, unsigned indent = 0) { + void printAsTree(TimeRecord total, unsigned indent = 0) { unsigned childIndent = indent; if (!hidden) { - printTimeEntry(os, indent, name, getTimeRecord(), total); + output->printTreeEntry(indent, name, getTimeRecord(), total); childIndent += 2; } for (auto &child : children) { - child.second->printAsTree(os, total, childIndent); + child.second->printAsTree(total, childIndent); + } + if (!hidden) { + output->printTreeEntryEnd(indent); } } /// Print the current timing information. - void print(raw_ostream &os, DisplayMode displayMode) { + void print(DisplayMode displayMode) { // Print the banner. auto total = getTimeRecord(); - printTimeHeader(os, total); + output->printHeader(total); // Defer to a specialized printer for each display mode. switch (displayMode) { case DisplayMode::List: - printAsList(os, total); + printAsList(total); break; case DisplayMode::Tree: - printAsTree(os, total); + printAsTree(total); break; } @@ -379,9 +433,9 @@ public: auto rest = total; for (auto &child : children) rest -= child.second->getTimeRecord(); - printTimeEntry(os, 0, "Rest", rest, total); - printTimeEntry(os, 0, "Total", total, total); - os.flush(); + output->printListEntry("Rest", rest, total); + output->printListEntry("Total", total, total, /*lastEntry=*/true); + output->printFooter(); } /// The last time instant at which the timer was started. @@ -415,6 +469,8 @@ public: /// Mutex for the async children. std::mutex asyncMutex; + + std::unique_ptr &output; }; } // namespace @@ -435,9 +491,6 @@ public: /// The configured display mode. DisplayMode displayMode = DisplayMode::Tree; - /// The stream where we should print our output. This will always be non-null. - raw_ostream *output = &llvm::errs(); - /// The root timer. std::unique_ptr rootTimer; }; @@ -469,26 +522,22 @@ DefaultTimingManager::DisplayMode DefaultTimingManager::getDisplayMode() const { } /// Change the stream where the output will be printed to. -void DefaultTimingManager::setOutput(raw_ostream &os) { impl->output = &os; } - -/// Return the current output stream where the output will be printed to. -raw_ostream &DefaultTimingManager::getOutput() const { - assert(impl->output); - return *impl->output; +void DefaultTimingManager::setOutput(std::unique_ptr output) { + out = std::move(output); } /// Print and clear the timing results. void DefaultTimingManager::print() { if (impl->enabled) { impl->rootTimer->finalize(); - impl->rootTimer->print(*impl->output, impl->displayMode); + impl->rootTimer->print(impl->displayMode); } clear(); } /// Clear the timing results. void DefaultTimingManager::clear() { - impl->rootTimer = std::make_unique("root"); + impl->rootTimer = std::make_unique("root", out); impl->rootTimer->hidden = true; } @@ -500,13 +549,13 @@ void DefaultTimingManager::dumpTimers(raw_ostream &os) { /// Debug print the timers as a list. void DefaultTimingManager::dumpAsList(raw_ostream &os) { impl->rootTimer->finalize(); - impl->rootTimer->print(os, DisplayMode::List); + impl->rootTimer->print(DisplayMode::List); } /// Debug print the timers as a tree. void DefaultTimingManager::dumpAsTree(raw_ostream &os) { impl->rootTimer->finalize(); - impl->rootTimer->print(os, DisplayMode::Tree); + impl->rootTimer->print(DisplayMode::Tree); } std::optional DefaultTimingManager::rootTimer() { @@ -549,6 +598,13 @@ struct DefaultTimingManagerOptions { "display the results in a list sorted by total time"), clEnumValN(DisplayMode::Tree, "tree", "display the results ina with a nested tree view"))}; + llvm::cl::opt outputFormat{ + "mlir-output-format", llvm::cl::desc("Output format for timing data"), + llvm::cl::init(OutputFormat::Text), + llvm::cl::values(clEnumValN(OutputFormat::Text, "text", + "display the results in text format"), + clEnumValN(OutputFormat::Json, "json", + "display the results in JSON format"))}; }; } // namespace @@ -564,4 +620,11 @@ void mlir::applyDefaultTimingManagerCLOptions(DefaultTimingManager &tm) { return; tm.setEnabled(options->timing); tm.setDisplayMode(options->displayMode); + + std::unique_ptr printer; + if (options->outputFormat == OutputFormat::Text) + printer = std::make_unique(llvm::errs()); + else if (options->outputFormat == OutputFormat::Json) + printer = std::make_unique(llvm::errs()); + tm.setOutput(std::move(printer)); } diff --git a/mlir/test/Pass/pass-timing.mlir b/mlir/test/Pass/pass-timing.mlir index bd5d611..cfb4b74 100644 --- a/mlir/test/Pass/pass-timing.mlir +++ b/mlir/test/Pass/pass-timing.mlir @@ -1,5 +1,7 @@ // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=LIST %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list -mlir-output-format=json 2>&1 | FileCheck -check-prefix=LIST-JSON %s // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=PIPELINE %s +// RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree -mlir-output-format=json 2>&1 | FileCheck -check-prefix=PIPELINE-JSON %s // RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=list 2>&1 | FileCheck -check-prefix=MT_LIST %s // RUN: mlir-opt %s -mlir-disable-threading=false -verify-each=true -pass-pipeline='builtin.module(func.func(cse,canonicalize,cse))' -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=MT_PIPELINE %s // RUN: mlir-opt %s -mlir-disable-threading=true -verify-each=false -test-pm-nested-pipeline -mlir-timing -mlir-timing-display=tree 2>&1 | FileCheck -check-prefix=NESTED_PIPELINE %s @@ -12,6 +14,14 @@ // LIST-DAG: DominanceInfo // LIST: Total +// LIST-JSON-NOT: Execution time report +// LIST-JSON-NOT: Total Execution Time: +// LIST-JSON-NOT: Name +// LIST-JSON-DAG: "name": "Canonicalizer"} +// LIST-JSON-DAG: "name": "CSE"} +// LIST-JSON-DAG: "name": "(A) DominanceInfo"} +// LIST-JSON: "name": "Total"} + // PIPELINE: Execution time report // PIPELINE: Total Execution Time: // PIPELINE: Name @@ -26,6 +36,28 @@ // PIPELINE-NEXT: Rest // PIPELINE-NEXT: Total +// PIPELINE-JSON-NOT: Execution time report +// PIPELINE-JSON-NOT: Total Execution Time: +// PIPELINE-JSON-NOT: Name +// PIPELINE-JSON: "name": "Parser", "passes": [ +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: "name": "'func.func' Pipeline", "passes": [ +// PIPELINE-JSON-NEXT: "name": "CSE", "passes": [ +// PIPELINE-JSON-NEXT: "name": "(A) DominanceInfo", "passes": [ +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: "name": "Canonicalizer", "passes": [ +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: "name": "CSE", "passes": [ +// PIPELINE-JSON-NEXT: "name": "(A) DominanceInfo", "passes": [ +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: "name": "Output", "passes": [ +// PIPELINE-JSON-NEXT: {}]}, +// PIPELINE-JSON-NEXT: "name": "Rest" +// PIPELINE-JSON-NEXT: "name": "Total" + // MT_LIST: Execution time report // MT_LIST: Total Execution Time: // MT_LIST: Name -- cgit v1.1 From 72e2e4f7dc682fa3f6eda9f3cfbd20a8ffaac4e4 Mon Sep 17 00:00:00 2001 From: Owen Pan Date: Wed, 3 Apr 2024 09:00:23 -0700 Subject: [clang-format] Lambda parameter should be passed by const reference (#87306) Closes #87254. --- clang/lib/Format/Format.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index e41cf29..89e6c19 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -3581,7 +3581,7 @@ cleanupAroundReplacements(StringRef Code, const tooling::Replacements &Replaces, // We need to use lambda function here since there are two versions of // `cleanup`. auto Cleanup = [](const FormatStyle &Style, StringRef Code, - std::vector Ranges, + ArrayRef Ranges, StringRef FileName) -> tooling::Replacements { return cleanup(Style, Code, Ranges, FileName); }; -- cgit v1.1 From 6f2d8cc0614bee1074e9d11f1ac0df9ce9d185f6 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Wed, 3 Apr 2024 18:15:24 +0200 Subject: [libc++][chrono] Loads leap-seconds.list in tzdb. (#82113) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This implements the loading of the leap-seconds.list file and store its contents in the tzdb struct. This adds the required `leap_seconds` member. The class leap_seconds is fully implemented including its non-member functions. Implements parts of: - P0355 Extending to Calendars and Time Zones - P1614 The Mothership has Landed Implements: - P1981 Rename leap to leap_second - LWG3359 leap second support should allow for negative leap seconds - LWG3383 §[time.zone.leap.nonmembers] sys_seconds should be replaced with seconds --- libcxx/docs/Status/Cxx20Issues.csv | 4 +- libcxx/docs/Status/Cxx20Papers.csv | 2 +- libcxx/docs/Status/SpaceshipProjects.csv | 2 +- libcxx/include/CMakeLists.txt | 1 + libcxx/include/__chrono/leap_second.h | 126 +++++++++++++++++++++ libcxx/include/__chrono/tzdb.h | 3 + libcxx/include/chrono | 39 +++++++ libcxx/include/libcxx.imp | 1 + libcxx/include/module.modulemap | 1 + libcxx/modules/std/chrono.inc | 28 +++-- libcxx/src/CMakeLists.txt | 1 + libcxx/src/include/tzdb/leap_second_private.h | 27 +++++ libcxx/src/tzdb.cpp | 42 +++++++ .../chrono.nodiscard_extensions.compile.pass.cpp | 6 + .../chrono.nodiscard_extensions.verify.cpp | 6 + .../time.zone/time.zone.db/leap_seconds.pass.cpp | 119 +++++++++++++++++++ .../time.zone/time.zone.db/leap_seconds.pass.cpp | 75 ++++++++++++ .../time.zone.db.access/get_tzdb.pass.cpp | 3 + .../time.zone.db.tzdb/tzdb.members.pass.cpp | 6 +- .../time.zone/time.zone.leap/assign.copy.pass.cpp | 71 ++++++++++++ .../time.zone/time.zone.leap/cons.copy.pass.cpp | 69 +++++++++++ .../time.zone/time.zone.leap/members/date.pass.cpp | 53 +++++++++ .../time.zone.leap/members/value.pass.cpp | 53 +++++++++ .../time.zone.leap/nonmembers/comparison.pass.cpp | 85 ++++++++++++++ libcxx/test/support/test_chrono_leap_second.h | 52 +++++++++ 25 files changed, 852 insertions(+), 23 deletions(-) create mode 100644 libcxx/include/__chrono/leap_second.h create mode 100644 libcxx/src/include/tzdb/leap_second_private.h create mode 100644 libcxx/test/libcxx/time/time.zone/time.zone.db/leap_seconds.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.db/leap_seconds.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp create mode 100644 libcxx/test/support/test_chrono_leap_second.h diff --git a/libcxx/docs/Status/Cxx20Issues.csv b/libcxx/docs/Status/Cxx20Issues.csv index f0e9c40..db57b15 100644 --- a/libcxx/docs/Status/Cxx20Issues.csv +++ b/libcxx/docs/Status/Cxx20Issues.csv @@ -269,7 +269,7 @@ "`3355 `__","The memory algorithms should support move-only input iterators introduced by P1207","Prague","|Complete|","15.0","|ranges|" "`3356 `__","``__cpp_lib_nothrow_convertible``\ should be ``__cpp_lib_is_nothrow_convertible``\ ","Prague","|Complete|","12.0" "`3358 `__","|sect|\ [span.cons] is mistaken that ``to_address``\ can throw","Prague","|Complete|","17.0" -"`3359 `__","````\ leap second support should allow for negative leap seconds","Prague","","","|chrono|" +"`3359 `__","````\ leap second support should allow for negative leap seconds","Prague","|Complete|","19.0","|chrono|" "`3360 `__","``three_way_comparable_with``\ is inconsistent with similar concepts","Prague","|Nothing To Do|","","|spaceship|" "`3362 `__","Strike ``stop_source``\ 's ``operator!=``\ ","Prague","","" "`3363 `__","``drop_while_view``\ should opt-out of ``sized_range``\ ","Prague","|Nothing To Do|","","|ranges|" @@ -286,7 +286,7 @@ "`3380 `__","``common_type``\ and comparison categories","Prague","|Complete|","15.0","|spaceship|" "`3381 `__","``begin``\ and ``data``\ must agree for ``contiguous_range``\ ","Prague","|Nothing To Do|","","|ranges|" "`3382 `__","NTTP for ``pair``\ and ``array``\ ","Prague","","" -"`3383 `__","|sect|\ [time.zone.leap.nonmembers] ``sys_seconds``\ should be replaced with ``seconds``\ ","Prague","","","|chrono|" +"`3383 `__","|sect|\ [time.zone.leap.nonmembers] ``sys_seconds``\ should be replaced with ``seconds``\ ","Prague","|Complete|","19.0","|chrono|" "`3384 `__","``transform_view::*sentinel*``\ has an incorrect ``operator-``\ ","Prague","|Complete|","15.0","|ranges|" "`3385 `__","``common_iterator``\ is not sufficiently constrained for non-copyable iterators","Prague","|Complete|","15.0","|ranges|" "`3387 `__","|sect|\ [range.reverse.view] ``reverse_view``\ unintentionally requires ``range``\ ","Prague","|Complete|","15.0","|ranges|" diff --git a/libcxx/docs/Status/Cxx20Papers.csv b/libcxx/docs/Status/Cxx20Papers.csv index db64914..77078b1 100644 --- a/libcxx/docs/Status/Cxx20Papers.csv +++ b/libcxx/docs/Status/Cxx20Papers.csv @@ -179,7 +179,7 @@ "`P1970R2 `__","LWG","Consistency for size() functions: Add ranges::ssize","Prague","|Complete|","15.0","|ranges|" "`P1973R1 `__","LWG","Rename ""_default_init"" Functions, Rev1","Prague","|Complete|","16.0" "`P1976R2 `__","LWG","Fixed-size span construction from dynamic range","Prague","|Complete|","11.0","|ranges|" -"`P1981R0 `__","LWG","Rename leap to leap_second","Prague","* *","" +"`P1981R0 `__","LWG","Rename leap to leap_second","Prague","|Complete|","19.0","|chrono|" "`P1982R0 `__","LWG","Rename link to time_zone_link","Prague","|Complete|","19.0","|chrono|" "`P1983R0 `__","LWG","Wording for GB301, US296, US292, US291, and US283","Prague","|Complete|","15.0","|ranges|" "`P1994R1 `__","LWG","elements_view needs its own sentinel","Prague","|Complete|","16.0","|ranges|" diff --git a/libcxx/docs/Status/SpaceshipProjects.csv b/libcxx/docs/Status/SpaceshipProjects.csv index c822107..3d14f48 100644 --- a/libcxx/docs/Status/SpaceshipProjects.csv +++ b/libcxx/docs/Status/SpaceshipProjects.csv @@ -173,7 +173,7 @@ Section,Description,Dependencies,Assignee,Complete | `year_month_weekday_last `_",None,Hristo Hristov,|Complete| `[time.zone.nonmembers] `_,"`chrono::time_zone`",A ```` implementation,Mark de Wever,|Complete| `[time.zone.zonedtime.nonmembers] `_,"`chrono::zoned_time`",A ```` implementation,Mark de Wever,|In Progress| -`[time.zone.leap.nonmembers] `_,"`chrono::time_leap_seconds`",A ```` implementation,Mark de Wever,|In Progress| +`[time.zone.leap.nonmembers] `_,"`chrono::time_leap_seconds`",A ```` implementation,Mark de Wever,|Complete| `[time.zone.link.nonmembers] `_,"`chrono::time_zone_link`",A ```` implementation,Mark de Wever,|Complete| - `5.13 Clause 28: Localization library `_,,,, "| `[locale] `_ diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index b935e45..db39803 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -282,6 +282,7 @@ set(files __chrono/formatter.h __chrono/hh_mm_ss.h __chrono/high_resolution_clock.h + __chrono/leap_second.h __chrono/literals.h __chrono/month.h __chrono/month_weekday.h diff --git a/libcxx/include/__chrono/leap_second.h b/libcxx/include/__chrono/leap_second.h new file mode 100644 index 0000000..4e67cc2 --- /dev/null +++ b/libcxx/include/__chrono/leap_second.h @@ -0,0 +1,126 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html + +#ifndef _LIBCPP___CHRONO_LEAP_SECOND_H +#define _LIBCPP___CHRONO_LEAP_SECOND_H + +#include +// Enable the contents of the header only when libc++ was built with experimental features enabled. +#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) + +# include <__chrono/duration.h> +# include <__chrono/system_clock.h> +# include <__chrono/time_point.h> +# include <__compare/ordering.h> +# include <__compare/three_way_comparable.h> +# include <__config> + +# if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +# endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +# if _LIBCPP_STD_VER >= 20 + +namespace chrono { + +class leap_second { +public: + struct __constructor_tag; + [[nodiscard]] + _LIBCPP_HIDE_FROM_ABI explicit constexpr leap_second(__constructor_tag&&, sys_seconds __date, seconds __value) + : __date_(__date), __value_(__value) {} + + _LIBCPP_HIDE_FROM_ABI leap_second(const leap_second&) = default; + _LIBCPP_HIDE_FROM_ABI leap_second& operator=(const leap_second&) = default; + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr sys_seconds date() const noexcept { return __date_; } + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI constexpr seconds value() const noexcept { return __value_; } + +private: + sys_seconds __date_; + seconds __value_; +}; + +_LIBCPP_HIDE_FROM_ABI inline constexpr bool operator==(const leap_second& __x, const leap_second& __y) { + return __x.date() == __y.date(); +} + +_LIBCPP_HIDE_FROM_ABI inline constexpr strong_ordering operator<=>(const leap_second& __x, const leap_second& __y) { + return __x.date() <=> __y.date(); +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator==(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() == __y; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() < __y; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator<(const sys_time<_Duration>& __x, const leap_second& __y) { + return __x < __y.date(); +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const leap_second& __x, const sys_time<_Duration>& __y) { + return __y < __x; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator>(const sys_time<_Duration>& __x, const leap_second& __y) { + return __y < __x; +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const leap_second& __x, const sys_time<_Duration>& __y) { + return !(__y < __x); +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator<=(const sys_time<_Duration>& __x, const leap_second& __y) { + return !(__y < __x); +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const leap_second& __x, const sys_time<_Duration>& __y) { + return !(__x < __y); +} + +template +_LIBCPP_HIDE_FROM_ABI constexpr bool operator>=(const sys_time<_Duration>& __x, const leap_second& __y) { + return !(__x < __y); +} + +# ifndef _LIBCPP_COMPILER_GCC +// This requirement cause a compilation loop in GCC-13 and running out of memory. +// TODO TZDB Test whether GCC-14 fixes this. +template + requires three_way_comparable_with> +_LIBCPP_HIDE_FROM_ABI constexpr auto operator<=>(const leap_second& __x, const sys_time<_Duration>& __y) { + return __x.date() <=> __y; +} +# endif + +} // namespace chrono + +# endif //_LIBCPP_STD_VER >= 20 + +_LIBCPP_END_NAMESPACE_STD + +#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) + +#endif // _LIBCPP___CHRONO_LEAP_SECOND_H diff --git a/libcxx/include/__chrono/tzdb.h b/libcxx/include/__chrono/tzdb.h index 582172e..45c20f2 100644 --- a/libcxx/include/__chrono/tzdb.h +++ b/libcxx/include/__chrono/tzdb.h @@ -16,6 +16,7 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) +# include <__chrono/leap_second.h> # include <__chrono/time_zone.h> # include <__chrono/time_zone_link.h> # include <__config> @@ -40,6 +41,8 @@ struct tzdb { string version; vector zones; vector links; + + vector leap_seconds; }; } // namespace chrono diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 5bab3f8..4dd4313 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -688,6 +688,7 @@ struct tzdb { string version; vector zones; vector links; + vector leap_seconds; }; class tzdb_list { // C++20 @@ -731,6 +732,43 @@ class time_zone { bool operator==(const time_zone& x, const time_zone& y) noexcept; // C++20 strong_ordering operator<=>(const time_zone& x, const time_zone& y) noexcept; // C++20 +// [time.zone.leap], leap second support +class leap_second { // C++20 +public: + leap_second(const leap_second&) = default; + leap_second& operator=(const leap_second&) = default; + + // unspecified additional constructors + + constexpr sys_seconds date() const noexcept; + constexpr seconds value() const noexcept; +}; + +constexpr bool operator==(const leap_second& x, const leap_second& y); // C++20 +constexpr strong_ordering operator<=>(const leap_second& x, const leap_second& y); + +template // C++20 + constexpr bool operator==(const leap_second& x, const sys_time& y); +template // C++20 + constexpr bool operator< (const leap_second& x, const sys_time& y); +template // C++20 + constexpr bool operator< (const sys_time& x, const leap_second& y); +template // C++20 + constexpr bool operator> (const leap_second& x, const sys_time& y); +template // C++20 + constexpr bool operator> (const sys_time& x, const leap_second& y); +template // C++20 + constexpr bool operator<=(const leap_second& x, const sys_time& y); +template // C++20 + constexpr bool operator<=(const sys_time& x, const leap_second& y); +template // C++20 + constexpr bool operator>=(const leap_second& x, const sys_time& y); +template // C++20 + constexpr bool operator>=(const sys_time& x, const leap_second& y); +template // C++20 + requires three_way_comparable_with> + constexpr auto operator<=>(const leap_second& x, const sys_time& y); + // [time.zone.link], class time_zone_link class time_zone_link { // C++20 public: @@ -862,6 +900,7 @@ constexpr chrono::year operator ""y(unsigned lo #if !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && \ !defined(_LIBCPP_HAS_NO_LOCALIZATION) +# include <__chrono/leap_second.h> # include <__chrono/time_zone.h> # include <__chrono/time_zone_link.h> # include <__chrono/tzdb.h> diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index ea0ba8c..2cb1fa5 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -279,6 +279,7 @@ { include: [ "<__chrono/formatter.h>", "private", "", "public" ] }, { include: [ "<__chrono/hh_mm_ss.h>", "private", "", "public" ] }, { include: [ "<__chrono/high_resolution_clock.h>", "private", "", "public" ] }, + { include: [ "<__chrono/leap_second.h>", "private", "", "public" ] }, { include: [ "<__chrono/literals.h>", "private", "", "public" ] }, { include: [ "<__chrono/month.h>", "private", "", "public" ] }, { include: [ "<__chrono/month_weekday.h>", "private", "", "public" ] }, diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 22c3803..6d4dcc2 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1145,6 +1145,7 @@ module std_private_chrono_high_resolution_clock [system] { export std_private_chrono_steady_clock export std_private_chrono_system_clock } +module std_private_chrono_leap_second [system] { header "__chrono/leap_second.h" } module std_private_chrono_literals [system] { header "__chrono/literals.h" } module std_private_chrono_month [system] { header "__chrono/month.h" } module std_private_chrono_month_weekday [system] { header "__chrono/month_weekday.h" } diff --git a/libcxx/modules/std/chrono.inc b/libcxx/modules/std/chrono.inc index 109023a..2c0bd3f 100644 --- a/libcxx/modules/std/chrono.inc +++ b/libcxx/modules/std/chrono.inc @@ -208,10 +208,7 @@ export namespace std { using std::chrono::reload_tzdb; using std::chrono::remote_version; -# endif // !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && - // !defined(_LIBCPP_HAS_NO_LOCALIZATION) - -# if 0 +# if 0 // [time.zone.exception], exception classes using std::chrono::ambiguous_local_time; using std::chrono::nonexistent_local_time; @@ -221,11 +218,11 @@ export namespace std { // [time.zone.timezone], class time_zone using std::chrono::choose; -# endif -# ifdef _LIBCPP_ENABLE_EXPERIMENTAL +# endif // if 0 + using std::chrono::time_zone; -# endif -# if 0 + +# if 0 // [time.zone.zonedtraits], class template zoned_traits using std::chrono::zoned_traits; @@ -234,22 +231,23 @@ export namespace std { using std::chrono::zoned_time; using std::chrono::zoned_seconds; +# endif // if 0 // [time.zone.leap], leap second support using std::chrono::leap_second; -# endif -# ifdef _LIBCPP_ENABLE_EXPERIMENTAL // [time.zone.link], class time_zone_link using std::chrono::time_zone_link; -# endif -# if 0 +# if 0 // [time.format], formatting using std::chrono::local_time_format; -# endif -#endif // _LIBCPP_ENABLE_EXPERIMENTAL - } // namespace chrono +# endif +# endif // _LIBCPP_ENABLE_EXPERIMENTAL +#endif // !defined(_LIBCPP_HAS_NO_TIME_ZONE_DATABASE) && !defined(_LIBCPP_HAS_NO_FILESYSTEM) && + // !defined(_LIBCPP_HAS_NO_LOCALIZATION) + + } // namespace chrono #ifndef _LIBCPP_HAS_NO_LOCALIZATION using std::formatter; diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt index 1110a79..16ccb80 100644 --- a/libcxx/src/CMakeLists.txt +++ b/libcxx/src/CMakeLists.txt @@ -334,6 +334,7 @@ endif() if (LIBCXX_ENABLE_LOCALIZATION AND LIBCXX_ENABLE_FILESYSTEM AND LIBCXX_ENABLE_TIME_ZONE_DATABASE) list(APPEND LIBCXX_EXPERIMENTAL_SOURCES + include/tzdb/leap_second_private.h include/tzdb/time_zone_link_private.h include/tzdb/time_zone_private.h include/tzdb/types_private.h diff --git a/libcxx/src/include/tzdb/leap_second_private.h b/libcxx/src/include/tzdb/leap_second_private.h new file mode 100644 index 0000000..7a811ab --- /dev/null +++ b/libcxx/src/include/tzdb/leap_second_private.h @@ -0,0 +1,27 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// For information see https://libcxx.llvm.org/DesignDocs/TimeZone.html + +#ifndef _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H +#define _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H + +#include + +_LIBCPP_BEGIN_NAMESPACE_STD + +namespace chrono { + +struct leap_second::__constructor_tag {}; + +} // namespace chrono + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP_SRC_INCLUDE_TZDB_LEAP_SECOND_PRIVATE_H diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/tzdb.cpp index 0307f754..7ba5ceb 100644 --- a/libcxx/src/tzdb.cpp +++ b/libcxx/src/tzdb.cpp @@ -15,6 +15,7 @@ #include #include +#include "include/tzdb/leap_second_private.h" #include "include/tzdb/time_zone_link_private.h" #include "include/tzdb/time_zone_private.h" #include "include/tzdb/types_private.h" @@ -622,6 +623,36 @@ static void __parse_tzdata(tzdb& __db, __tz::__rules_storage_type& __rules, istr } } +static void __parse_leap_seconds(vector& __leap_seconds, istream&& __input) { + // The file stores dates since 1 January 1900, 00:00:00, we want + // seconds since 1 January 1970. + constexpr auto __offset = sys_days{1970y / January / 1} - sys_days{1900y / January / 1}; + + while (true) { + switch (__input.peek()) { + case istream::traits_type::eof(): + return; + + case ' ': + case '\t': + case '\n': + __input.get(); + continue; + + case '#': + chrono::__skip_line(__input); + continue; + } + + sys_seconds __date = sys_seconds{seconds{chrono::__parse_integral(__input, false)}} - __offset; + chrono::__skip_mandatory_whitespace(__input); + seconds __value{chrono::__parse_integral(__input, false)}; + chrono::__skip_line(__input); + + __leap_seconds.emplace_back(leap_second::__constructor_tag{}, __date, __value); + } +} + void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { filesystem::path __root = chrono::__libcpp_tzdb_directory(); ifstream __tzdata{__root / "tzdata.zi"}; @@ -631,6 +662,17 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { std::ranges::sort(__tzdb.zones); std::ranges::sort(__tzdb.links); std::ranges::sort(__rules, {}, [](const auto& p) { return p.first; }); + + // There are two files with the leap second information + // - leapseconds as specified by zic + // - leap-seconds.list the source data + // The latter is much easier to parse, it seems Howard shares that + // opinion. + chrono::__parse_leap_seconds(__tzdb.leap_seconds, ifstream{__root / "leap-seconds.list"}); + // The Standard requires the leap seconds to be sorted. The file + // leap-seconds.list usually provides them in sorted order, but that is not + // guaranteed so we ensure it here. + std::ranges::sort(__tzdb.leap_seconds); } //===----------------------------------------------------------------------===// diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp index 8019824..c868832 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp @@ -26,6 +26,7 @@ // These types have "private" constructors. extern std::chrono::time_zone tz; extern std::chrono::time_zone_link link; +extern std::chrono::leap_second leap; void test() { std::chrono::tzdb_list& list = std::chrono::get_tzdb_list(); @@ -51,4 +52,9 @@ void test() { operator==(link, link); operator<=>(link, link); } + + { + leap.date(); + leap.value(); + } } diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp index e9b2755..4d26b46 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp @@ -23,6 +23,7 @@ // These types have "private" constructors. extern std::chrono::time_zone tz; extern std::chrono::time_zone_link link; +extern std::chrono::leap_second leap; void test() { std::chrono::tzdb_list& list = std::chrono::get_tzdb_list(); @@ -51,4 +52,9 @@ void test() { // expected-warning@+1 {{ignoring return value of function declared with 'nodiscard' attribute}} operator<=>(link, link); } + + { + leap.date(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + leap.value(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } } diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/leap_seconds.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/leap_seconds.pass.cpp new file mode 100644 index 0000000..282bddc --- /dev/null +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/leap_seconds.pass.cpp @@ -0,0 +1,119 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// Tests the IANA database leap seconds parsing and operations. +// This is not part of the public tzdb interface. + +#include +#include +#include +#include +#include + +#include "assert_macros.h" +#include "concat_macros.h" +#include "filesystem_test_helper.h" +#include "test_tzdb.h" + +scoped_test_env env; +[[maybe_unused]] const std::filesystem::path dir = env.create_dir("zoneinfo"); +const std::filesystem::path tzdata = env.create_file("zoneinfo/tzdata.zi"); +const std::filesystem::path leap_seconds = env.create_file("zoneinfo/leap-seconds.list"); + +std::string_view std::chrono::__libcpp_tzdb_directory() { + static std::string result = dir.string(); + return result; +} + +void write(std::string_view input) { + static int version = 0; + + std::ofstream f{tzdata}; + f << "# version " << version++ << '\n'; + std::ofstream{leap_seconds}.write(input.data(), input.size()); +} + +static const std::chrono::tzdb& parse(std::string_view input) { + write(input); + return std::chrono::reload_tzdb(); +} + +static void test_exception(std::string_view input, [[maybe_unused]] std::string_view what) { + write(input); + + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::chrono::reload_tzdb()); +} + +static void test_invalid() { + test_exception("0", "corrupt tzdb: expected a non-zero digit"); + + test_exception("1", "corrupt tzdb: expected whitespace"); + + test_exception("1 ", "corrupt tzdb: expected a non-zero digit"); + + test_exception("5764607523034234880 2", "corrupt tzdb: integral too large"); +} + +static void test_leap_seconds() { + using namespace std::chrono; + + // Test whether loading also sorts the entries in the proper order. + const tzdb& result = parse( + R"( +2303683200 12 # 1 Jan 1973 +2287785600 11 # 1 Jul 1972 +2272060800 10 # 1 Jan 1972 +86400 1 # 2 Jan 1900 Dummy entry to test before 1970 + +# largest accepted value by the parser +5764607523034234879 2 +)"); + + assert(result.leap_seconds.size() == 5); + + assert(result.leap_seconds[0].date() == sys_seconds{sys_days{1900y / January / 2}}); + assert(result.leap_seconds[0].value() == 1s); + + assert(result.leap_seconds[1].date() == sys_seconds{sys_days{1972y / January / 1}}); + assert(result.leap_seconds[1].value() == 10s); + + assert(result.leap_seconds[2].date() == sys_seconds{sys_days{1972y / July / 1}}); + assert(result.leap_seconds[2].value() == 11s); + + assert(result.leap_seconds[3].date() == sys_seconds{sys_days{1973y / January / 1}}); + assert(result.leap_seconds[3].value() == 12s); + + assert(result.leap_seconds[4].date() == + sys_seconds{5764607523034234879s + // The database uses 1900-01-01 as epoch. + - std::chrono::duration_cast( + sys_days{1970y / January / 1} - sys_days{1900y / January / 1})}); + assert(result.leap_seconds[4].value() == 2s); +} + +int main(int, const char**) { + test_invalid(); + test_leap_seconds(); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/leap_seconds.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/leap_seconds.pass.cpp new file mode 100644 index 0000000..4fcdf6f --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/leap_seconds.pass.cpp @@ -0,0 +1,75 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// Tests the loaded leap seconds match +// https://eel.is/c++draft/time.zone.leap.overview#2 +// +// At the moment of writing that list is the actual list. +// If in the future more leap seconds are added, the returned list may have more + +#include +#include +#include +#include +#include + +using namespace std::literals::chrono_literals; + +// The list of leap seconds matching +// https://eel.is/c++draft/time.zone.leap.overview#2 +// At the moment of writing that list is the actual list in the IANA database. +// If in the future more leap seconds can be added. +static const std::array leap_seconds = { + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1972y / std::chrono::January / 1}}, 10s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1972y / std::chrono::July / 1}}, 11s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1973y / std::chrono::January / 1}}, 12s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1974y / std::chrono::January / 1}}, 13s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1975y / std::chrono::January / 1}}, 14s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1976y / std::chrono::January / 1}}, 15s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1977y / std::chrono::January / 1}}, 16s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1978y / std::chrono::January / 1}}, 17s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1979y / std::chrono::January / 1}}, 18s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1980y / std::chrono::January / 1}}, 19s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1981y / std::chrono::July / 1}}, 20s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1982y / std::chrono::July / 1}}, 21s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1983y / std::chrono::July / 1}}, 22s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1985y / std::chrono::July / 1}}, 23s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1988y / std::chrono::January / 1}}, 24s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1990y / std::chrono::January / 1}}, 25s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1991y / std::chrono::January / 1}}, 26s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1992y / std::chrono::July / 1}}, 27s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1993y / std::chrono::July / 1}}, 28s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1994y / std::chrono::July / 1}}, 29s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1996y / std::chrono::January / 1}}, 30s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1997y / std::chrono::July / 1}}, 31s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{1999y / std::chrono::January / 1}}, 32s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{2006y / std::chrono::January / 1}}, 33s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{2009y / std::chrono::January / 1}}, 34s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{2012y / std::chrono::July / 1}}, 35s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{2015y / std::chrono::July / 1}}, 36s), + std::make_pair(std::chrono::sys_seconds{std::chrono::sys_days{2017y / std::chrono::January / 1}}, 37s)}; + +int main(int, const char**) { + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + + assert(tzdb.leap_seconds.size() >= leap_seconds.size()); + assert((std::ranges::equal( + leap_seconds, + tzdb.leap_seconds | std::ranges::views::take(leap_seconds.size()), + [](const auto& lhs, const auto& rhs) { return lhs.first == rhs.date() && lhs.second == rhs.value(); }))); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp index b6204c6..470a722 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/get_tzdb.pass.cpp @@ -35,5 +35,8 @@ int main(int, const char**) { assert(std::ranges::is_sorted(db.links)); assert(std::ranges::adjacent_find(db.links) == db.links.end()); // is unique? + assert(!db.leap_seconds.empty()); + assert(std::ranges::is_sorted(db.leap_seconds)); + return 0; } diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/tzdb.members.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/tzdb.members.pass.cpp index 51c6d36..af95274 100644 --- a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/tzdb.members.pass.cpp +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/tzdb.members.pass.cpp @@ -37,11 +37,9 @@ int main(int, const char**) { tzdb.version = "version"; assert(tzdb.version == "version"); - [[maybe_unused]] std::vector& zones = tzdb.zones; - + [[maybe_unused]] std::vector& zones = tzdb.zones; [[maybe_unused]] std::vector& links = tzdb.links; - - // TODO TZDB add the leap data member + [[maybe_unused]] std::vector& leap_seconds = tzdb.leap_seconds; return 0; } diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp new file mode 100644 index 0000000..4d91e73 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.leap/assign.copy.pass.cpp @@ -0,0 +1,71 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class leap_second +// { +// leap_second& operator=(const leap_second&) = default; +// +// ... +// }; + +#include +#include +#include +#include +#include + +// Add the include path required by test_chrono_leap_second.h when using libc++. +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +#include "test_chrono_leap_second.h" + +constexpr bool test() { + std::chrono::leap_second a = + test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{0}}, std::chrono::seconds{1}); + std::chrono::leap_second b = + test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{10}}, std::chrono::seconds{15}); + + // operator== only compares the date member. + assert(a.date() != b.date()); + assert(a.value() != b.value()); + + { + std::same_as decltype(auto) result(b = a); + assert(std::addressof(result) == std::addressof(b)); + + assert(a.date() == b.date()); + assert(a.value() == b.value()); + } + + { + // Tests an rvalue uses the copy assignment. + std::same_as decltype(auto) result(b = std::move(a)); + assert(std::addressof(result) == std::addressof(b)); + + assert(a.date() == b.date()); + assert(a.value() == b.value()); + } + + return true; +} + +int main(int, const char**) { + static_assert(std::is_copy_assignable_v); + + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp new file mode 100644 index 0000000..e2419b7 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.leap/cons.copy.pass.cpp @@ -0,0 +1,69 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class leap_second +// { +// leap_second(const leap_second&) = default; +// +// ... +// }; + +#include +#include +#include + +// Add the include path required by test_chrono_leap_second.h when using libc++. +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +#include "test_chrono_leap_second.h" + +constexpr bool test() { + std::chrono::leap_second a = + test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{0}}, std::chrono::seconds{1}); + + { + std::chrono::leap_second b = a; + + // operator== only compares the date member. + assert(a.date() == b.date()); + assert(a.value() == b.value()); + } + +#ifdef _LIBCPP_VERSION + { + // Tests an rvalue uses the copy constructor. + // Since implementations are allowed to add additional constructors this is + // a libc++ specific test. + std::chrono::leap_second b = std::move(a); + + // operator== only compares the date member. + assert(a.date() == b.date()); + assert(a.value() == b.value()); + } + // libc++ does not provide a default constructor. + static_assert(!std::is_default_constructible_v); +#endif // _LIBCPP_VERSION + + return true; +} + +int main(int, const char**) { + static_assert(std::copy_constructible); + + test(); + static_assert(test()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp new file mode 100644 index 0000000..23f95ec --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.leap/members/date.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class leap_second; + +// constexpr sys_seconds date() const noexcept; + +#include +#include + +#include "test_macros.h" + +// Add the include path required by test_chrono_leap_second.h when using libc++. +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +#include "test_chrono_leap_second.h" + +constexpr void test(const std::chrono::leap_second leap_second, std::chrono::sys_seconds expected) { + std::same_as auto date = leap_second.date(); + assert(date == expected); + static_assert(noexcept(leap_second.date())); +} + +constexpr bool test() { + test(test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{0}}, std::chrono::seconds{1}), + std::chrono::sys_seconds{std::chrono::seconds{0}}); + + return true; +} + +int main(int, const char**) { + test(); + static_assert(test()); + + // test with the real tzdb + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + assert(!tzdb.leap_seconds.empty()); + test(tzdb.leap_seconds[0], tzdb.leap_seconds[0].date()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp new file mode 100644 index 0000000..844c74d --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.leap/members/value.pass.cpp @@ -0,0 +1,53 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class leap_second; + +// constexpr seconds value() const noexcept; + +#include +#include + +#include "test_macros.h" + +// Add the include path required by test_chrono_leap_second.h when using libc++. +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +#include "test_chrono_leap_second.h" + +constexpr void test(const std::chrono::leap_second leap_second, std::chrono::seconds expected) { + std::same_as auto value = leap_second.value(); + assert(value == expected); + static_assert(noexcept(leap_second.value())); +} + +constexpr bool test() { + test(test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{0}}, std::chrono::seconds{1}), + std::chrono::seconds{1}); + + return true; +} + +int main(int, const char**) { + test(); + static_assert(test()); + + // test with the real tzdb + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + assert(!tzdb.leap_seconds.empty()); + test(tzdb.leap_seconds[0], tzdb.leap_seconds[0].value()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp new file mode 100644 index 0000000..ac8b780 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.leap/nonmembers/comparison.pass.cpp @@ -0,0 +1,85 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// TODO TZDB test whether this can be enabled with gcc 14. +// UNSUPPORTED: gcc-13 + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// class leap_second; + +//constexpr bool operator==(const leap_second& x, const leap_second& y); // C++20 +//constexpr strong_ordering operator<=>(const leap_second& x, const leap_second& y); +// +//template +// constexpr bool operator==(const leap_second& x, const sys_time& y); +//template +// constexpr bool operator< (const leap_second& x, const sys_time& y); +//template +// constexpr bool operator< (const sys_time& x, const leap_second& y); +//template +// constexpr bool operator> (const leap_second& x, const sys_time& y); +//template +// constexpr bool operator> (const sys_time& x, const leap_second& y); +//template +// constexpr bool operator<=(const leap_second& x, const sys_time& y); +//template +// constexpr bool operator<=(const sys_time& x, const leap_second& y); +//template +// constexpr bool operator>=(const leap_second& x, const sys_time& y); +//template +// constexpr bool operator>=(const sys_time& x, const leap_second& y); +//template +// requires three_way_comparable_with> +// constexpr auto operator<=>(const leap_second& x, const sys_time& y); + +#include +#include + +#include "test_macros.h" +#include "test_comparisons.h" + +// Add the include path required by test_chrono_leap_second.h when using libc++. +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +#include "test_chrono_leap_second.h" + +constexpr void test_comparison(const std::chrono::leap_second lhs, const std::chrono::leap_second rhs) { + AssertOrderReturn(); + assert(testOrder(lhs, rhs, std::strong_ordering::less)); + + AssertOrderReturn(); + assert(testOrder(lhs, rhs.date(), std::strong_ordering::less)); + + AssertOrderReturn(); + assert(testOrder(lhs.date(), rhs, std::strong_ordering::less)); +} + +constexpr bool test() { + test_comparison(test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{0}}, std::chrono::seconds{1}), + test_leap_second_create(std::chrono::sys_seconds{std::chrono::seconds{1}}, std::chrono::seconds{2})); + + return true; +} + +int main(int, const char**) { + test(); + static_assert(test()); + + // test with the real tzdb + const std::chrono::tzdb& tzdb = std::chrono::get_tzdb(); + assert(tzdb.leap_seconds.size() > 2); + test_comparison(tzdb.leap_seconds[0], tzdb.leap_seconds[1]); + + return 0; +} diff --git a/libcxx/test/support/test_chrono_leap_second.h b/libcxx/test/support/test_chrono_leap_second.h new file mode 100644 index 0000000..485f68d91 --- /dev/null +++ b/libcxx/test/support/test_chrono_leap_second.h @@ -0,0 +1,52 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef SUPPORT_TEST_CHRONO_LEAP_SECOND_HPP +#define SUPPORT_TEST_CHRONO_LEAP_SECOND_HPP + +// Contains helper functions to create a std::chrono::leap_second. +// +// Since the standard doesn't specify how a @ref std::chrono::leap_second is +// constructed this is implementation defined. To make the public API tests of +// the class generic this header defines helper functions to create the +// required object. +// +// Note This requires every standard library implementation to write their own +// helper function. Vendors are encouraged to create a pull request at +// https://github.com/llvm/llvm-project so their specific implementation can be +// part of this file. + +#include "test_macros.h" + +#if TEST_STD_VER < 20 +# error "The format header requires at least C++20" +#endif + +#include + +#ifdef _LIBCPP_VERSION + +// In order to find this include the calling test needs to provide this path in +// the search path. Typically this looks like: +// ADDITIONAL_COMPILE_FLAGS(stdlib=libc++): -I %{libcxx-dir}/src/include +// where the number of `../` sequences depends on the subdirectory level of the +// test. +# include "tzdb/leap_second_private.h" // Header in the dylib + +inline constexpr std::chrono::leap_second +test_leap_second_create(const std::chrono::sys_seconds& date, const std::chrono::seconds& value) { + return std::chrono::leap_second{std::chrono::leap_second::__constructor_tag{}, date, value}; +} + +#else // _LIBCPP_VERSION +# error \ + "Please create a vendor specific version of the test typedef and file a PR at https://github.com/llvm/llvm-project" +#endif // _LIBCPP_VERSION + +#endif // SUPPORT_TEST_CHRONO_LEAP_SECOND_HPP -- cgit v1.1 From 5b959310b0fae723bd119ed8815bf1cb1a8c67d4 Mon Sep 17 00:00:00 2001 From: Chris Bieneman Date: Wed, 3 Apr 2024 11:24:26 -0500 Subject: [NFC] Delete unintentionally added file --- clang/test/SemaHLSL/ArrayTemporary.ll | 76 ----------------------------------- 1 file changed, 76 deletions(-) delete mode 100644 clang/test/SemaHLSL/ArrayTemporary.ll diff --git a/clang/test/SemaHLSL/ArrayTemporary.ll b/clang/test/SemaHLSL/ArrayTemporary.ll deleted file mode 100644 index 5eec009..0000000 --- a/clang/test/SemaHLSL/ArrayTemporary.ll +++ /dev/null @@ -1,76 +0,0 @@ -; ModuleID = '/Users/cbieneman/dev/llvm-project/clang/test/SemaHLSL/ArrayTemporary.hlsl' -source_filename = "/Users/cbieneman/dev/llvm-project/clang/test/SemaHLSL/ArrayTemporary.hlsl" -target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" -target triple = "dxil-pc-shadermodel6.3-library" - -%struct.Obj = type { float, i32 } - -@"__const.?call3@@YAXXZ.Arr" = private unnamed_addr constant [2 x [2 x float]] [[2 x float] zeroinitializer, [2 x float] [float 1.000000e+00, float 1.000000e+00]], align 4 - -; Function Attrs: noinline nounwind optnone -define void @"?fn@@YAXY01M@Z"(ptr noundef byval([2 x float]) align 4 %x) #0 { -entry: - ret void -} - -; Function Attrs: noinline nounwind optnone -define void @"?call@@YAXXZ"() #0 { -entry: - %Arr = alloca [2 x float], align 4 - %agg.tmp = alloca [2 x float], align 4 - call void @llvm.memset.p0.i32(ptr align 4 %Arr, i8 0, i32 8, i1 false) - call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 8, i1 false) - call void @"?fn@@YAXY01M@Z"(ptr noundef byval([2 x float]) align 4 %agg.tmp) - ret void -} - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: write) -declare void @llvm.memset.p0.i32(ptr nocapture writeonly, i8, i32, i1 immarg) #1 - -; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) -declare void @llvm.memcpy.p0.p0.i32(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i32, i1 immarg) #2 - -; Function Attrs: noinline nounwind optnone -define void @"?fn2@@YAXY03UObj@@@Z"(ptr noundef byval([4 x %struct.Obj]) align 4 %O) #0 { -entry: - ret void -} - -; Function Attrs: noinline nounwind optnone -define void @"?call2@@YAXXZ"() #0 { -entry: - %Arr = alloca [4 x %struct.Obj], align 4 - %agg.tmp = alloca [4 x %struct.Obj], align 4 - call void @llvm.memset.p0.i32(ptr align 4 %Arr, i8 0, i32 32, i1 false) - call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 32, i1 false) - call void @"?fn2@@YAXY03UObj@@@Z"(ptr noundef byval([4 x %struct.Obj]) align 4 %agg.tmp) - ret void -} - -; Function Attrs: noinline nounwind optnone -define void @"?fn3@@YAXY111M@Z"(ptr noundef byval([2 x [2 x float]]) align 4 %x) #0 { -entry: - ret void -} - -; Function Attrs: noinline nounwind optnone -define void @"?call3@@YAXXZ"() #0 { -entry: - %Arr = alloca [2 x [2 x float]], align 4 - %agg.tmp = alloca [2 x [2 x float]], align 4 - call void @llvm.memcpy.p0.p0.i32(ptr align 4 %Arr, ptr align 4 @"__const.?call3@@YAXXZ.Arr", i32 16, i1 false) - call void @llvm.memcpy.p0.p0.i32(ptr align 4 %agg.tmp, ptr align 4 %Arr, i32 16, i1 false) - call void @"?fn3@@YAXY111M@Z"(ptr noundef byval([2 x [2 x float]]) align 4 %agg.tmp) - ret void -} - -attributes #0 = { noinline nounwind optnone "no-trapping-math"="true" "stack-protector-buffer-size"="8" } -attributes #1 = { nocallback nofree nounwind willreturn memory(argmem: write) } -attributes #2 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } - -!llvm.module.flags = !{!0, !1} -!llvm.ident = !{!2} - -!0 = !{i32 1, !"wchar_size", i32 4} -!1 = !{i32 4, !"dx.disable_optimizations", i32 1} -!2 = !{!"clang version 19.0.0git (git@github.com:llvm/llvm-project.git 64e1c15c520cf11114ef2ddd887e76560903db2b)"} -- cgit v1.1 From cc308f60d41744b5920ec2e2e5b25e1273c8704b Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 3 Apr 2024 18:39:53 +0200 Subject: [clang] Support __typeof_unqual__ in all C modes (#87392) GCC has added __typeof_unqual__ to allow typeof_unqual to be used in all C modes (not just C23 and newer), similar to __typeof__ and typeof. https://gcc.gnu.org/git/?p=gcc.git;a=commit;h=607d9d50ee44163cee621cd991600acaf78c2fee The Linux kernel would like to start using __typeof_unqual__ to strip type qualifiers such as address spaces from inputs to macros but cannot switch to C23 due to compiler version requirements. Match GCC and allow __typeof_unqual__ in all C modes. Closes: https://github.com/llvm/llvm-project/issues/76423 Link: https://lore.kernel.org/CAFULd4YG21NdF_qNVBGDtXO6xnaYFeRPvKicB=gpgUUqYE=4jw@mail.gmail.com/ --- clang/docs/ReleaseNotes.rst | 3 +++ clang/include/clang/Basic/TokenKinds.def | 46 +++++++++++++++++--------------- clang/test/Parser/c2x-typeof-ext-warns.c | 7 +++-- clang/test/SemaCXX/typeof_unqual.cpp | 5 ++++ 4 files changed, 37 insertions(+), 24 deletions(-) create mode 100644 clang/test/SemaCXX/typeof_unqual.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 096376a..e4c0e49 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -193,6 +193,9 @@ Non-comprehensive list of changes in this release with support for any unsigned integer type. Like the previous builtins, these new builtins are constexpr and may be used in constant expressions. +- ``__typeof_unqual__`` is available in all C modes as an extension, which behaves + like ``typeof_unqual`` from C23, similar to ``__typeof__`` and ``typeof``. + New Compiler Flags ------------------ - ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and diff --git a/clang/include/clang/Basic/TokenKinds.def b/clang/include/clang/Basic/TokenKinds.def index 3a96f8a..800af0e 100644 --- a/clang/include/clang/Basic/TokenKinds.def +++ b/clang/include/clang/Basic/TokenKinds.def @@ -665,28 +665,30 @@ KEYWORD(__kindof , KEYOBJC) // Alternate spelling for various tokens. There are GCC extensions in all // languages, but should not be disabled in strict conformance mode. -ALIAS("__alignof__" , __alignof , KEYALL) -ALIAS("__asm" , asm , KEYALL) -ALIAS("__asm__" , asm , KEYALL) -ALIAS("__attribute__", __attribute, KEYALL) -ALIAS("__complex" , _Complex , KEYALL) -ALIAS("__complex__" , _Complex , KEYALL) -ALIAS("__const" , const , KEYALL) -ALIAS("__const__" , const , KEYALL) -ALIAS("__decltype" , decltype , KEYCXX) -ALIAS("__imag__" , __imag , KEYALL) -ALIAS("__inline" , inline , KEYALL) -ALIAS("__inline__" , inline , KEYALL) -ALIAS("__nullptr" , nullptr , KEYCXX) -ALIAS("__real__" , __real , KEYALL) -ALIAS("__restrict" , restrict , KEYALL) -ALIAS("__restrict__" , restrict , KEYALL) -ALIAS("__signed" , signed , KEYALL) -ALIAS("__signed__" , signed , KEYALL) -ALIAS("__typeof" , typeof , KEYALL) -ALIAS("__typeof__" , typeof , KEYALL) -ALIAS("__volatile" , volatile , KEYALL) -ALIAS("__volatile__" , volatile , KEYALL) +ALIAS("__alignof__" , __alignof , KEYALL) +ALIAS("__asm" , asm , KEYALL) +ALIAS("__asm__" , asm , KEYALL) +ALIAS("__attribute__" , __attribute , KEYALL) +ALIAS("__complex" , _Complex , KEYALL) +ALIAS("__complex__" , _Complex , KEYALL) +ALIAS("__const" , const , KEYALL) +ALIAS("__const__" , const , KEYALL) +ALIAS("__decltype" , decltype , KEYCXX) +ALIAS("__imag__" , __imag , KEYALL) +ALIAS("__inline" , inline , KEYALL) +ALIAS("__inline__" , inline , KEYALL) +ALIAS("__nullptr" , nullptr , KEYCXX) +ALIAS("__real__" , __real , KEYALL) +ALIAS("__restrict" , restrict , KEYALL) +ALIAS("__restrict__" , restrict , KEYALL) +ALIAS("__signed" , signed , KEYALL) +ALIAS("__signed__" , signed , KEYALL) +ALIAS("__typeof" , typeof , KEYALL) +ALIAS("__typeof__" , typeof , KEYALL) +ALIAS("__typeof_unqual" , typeof_unqual, KEYALL) +ALIAS("__typeof_unqual__", typeof_unqual, KEYALL) +ALIAS("__volatile" , volatile , KEYALL) +ALIAS("__volatile__" , volatile , KEYALL) // Type nullability. KEYWORD(_Nonnull , KEYALL) diff --git a/clang/test/Parser/c2x-typeof-ext-warns.c b/clang/test/Parser/c2x-typeof-ext-warns.c index 3871844..7a1f673 100644 --- a/clang/test/Parser/c2x-typeof-ext-warns.c +++ b/clang/test/Parser/c2x-typeof-ext-warns.c @@ -12,9 +12,12 @@ // standards before C23, and Clang has followed suit. Neither compiler exposes // 'typeof_unqual' as a non-conforming extension. -// Show what happens with the underscored version of the keyword, which is a -// conforming extension. +// Show what happens with the underscored version of the keywords, which are +// conforming extensions. __typeof__(int) i = 12; +__typeof(int) _i = 12; +__typeof_unqual__(int) u = 12; +__typeof_unqual(int) _u = 12; // Show what happens with a regular 'typeof' use. typeof(i) j = 12; // c11-error {{expected function body after function declarator}} \ diff --git a/clang/test/SemaCXX/typeof_unqual.cpp b/clang/test/SemaCXX/typeof_unqual.cpp new file mode 100644 index 0000000..335e579 --- /dev/null +++ b/clang/test/SemaCXX/typeof_unqual.cpp @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -fsyntax-only -verify %s + +typeof_unqual(int) u = 12; // expected-error {{expected function body after function declarator}} +__typeof_unqual(int) _u = 12; +__typeof_unqual__(int) __u = 12; -- cgit v1.1 From 2ff3850ea19f72573d8abdf9a78e52d3dfdd90ac Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Wed, 3 Apr 2024 11:43:23 -0500 Subject: [OpenMP] Add absolute KMP_HW_SUBSET functionality (#85326) Users can put a : in front of KMP_HW_SUBSET to indicate that the specified subset is an "absolute" subset. Currently, when a user puts KMP_HW_SUBSET=1t. This gets translated to KMP_HW_SUBSET="*s,*c,1t", where * means "use all of". If a user wants only one thread as the entire topology they can now do KMP_HW_SUBSET=:1t. Along with the absolute syntax is a fix for newer machines and making them easier to use with only the 3-level topology syntax. When a user puts KMP_HW_SUBSET=1s,4c,2t on a machine which actually has 4 layers, (say 1s,2m,3c,2t as the entire machine) the user gets an unexpected "too many resources asked" message because KMP_HW_SUBSET currently translates the "4c" value to mean 4 cores per module. To help users out, the runtime can assume that these newer layers, module in this case, should be ignored if they are not specified, but the topology should always take into account the sockets, cores, and threads layers. --- openmp/docs/design/Runtimes.rst | 24 +++- openmp/runtime/src/kmp_affinity.cpp | 170 +++++++++++++---------- openmp/runtime/src/kmp_affinity.h | 44 ++++++ openmp/runtime/test/affinity/kmp-abs-hw-subset.c | 95 +++++++++++++ 4 files changed, 256 insertions(+), 77 deletions(-) create mode 100644 openmp/runtime/test/affinity/kmp-abs-hw-subset.c diff --git a/openmp/docs/design/Runtimes.rst b/openmp/docs/design/Runtimes.rst index 9002fa6..f8a8cb8 100644 --- a/openmp/docs/design/Runtimes.rst +++ b/openmp/docs/design/Runtimes.rst @@ -496,7 +496,9 @@ An extended syntax is available when ``KMP_TOPOLOGY_METHOD=hwloc``. Depending on resources are detected, you may be able to specify additional resources, such as NUMA domains and groups of hardware resources that share certain cache levels. -**Basic syntax:** ``[num_units|*]ID[@offset][:attribute] [,[num_units|*]ID[@offset][:attribute]...]`` +**Basic syntax:** ``[:][num_units|*]ID[@offset][:attribute] [,[num_units|*]ID[@offset][:attribute]...]`` + +An optional colon (:) can be specified at the beginning of the syntax to specify an explicit hardware subset. The default is an implicit hardware subset. Supported unit IDs are not case-insensitive. @@ -547,6 +549,18 @@ When any numa or tile units are specified in ``KMP_HW_SUBSET`` and the hwloc topology method is available, the ``KMP_TOPOLOGY_METHOD`` will be automatically set to hwloc, so there is no need to set it explicitly. +For an **explicit hardware subset**, if one or more topology layers detected by the +runtime are omitted from the subset, then those topology layers are ignored. +Only explicitly specified topology layers are used in the subset. + +For an **implicit hardware subset**, it is implied that the socket, core, and thread +topology types should be included in the subset. Other topology layers are not +implicitly included and are ignored if they are not specified in the subset. +Because the socket, core and thread topology types are always included in +implicit hardware subsets, when they are omitted, it is assumed that all +available resources of that type should be used. Implicit hardware subsets are +the default. + If you don't specify one or more types of resource, such as socket or thread, all available resources of that type are used. @@ -565,7 +579,7 @@ This variable does not work if ``KMP_AFFINITY=disabled``. **Default:** If omitted, the default value is to use all the available hardware resources. -**Examples:** +**Implicit Hardware Subset Examples:** * ``2s,4c,2t``: Use the first 2 sockets (s0 and s1), the first 4 cores on each socket (c0 - c3), and 2 threads per core. @@ -590,6 +604,12 @@ available hardware resources. * ``*c:eff1@3``: Use all available sockets, skip the first three cores of efficiency 1, and then use the rest of the available cores of efficiency 1. +Explicit Hardware Subset Examples: + +* ``:2s,6t`` Use exactly the first two sockets and 6 threads per socket. +* ``:1t@7`` Skip the first 7 threads (t0-t6) and use exactly one thread (t7). +* ``:5c,1t`` Use exactly the first 5 cores (c0-c4) and the first thread on each core. + To see the result of the setting, you can specify ``verbose`` modifier in ``KMP_AFFINITY`` environment variable. The OpenMP run-time library will output to ``stderr`` the information about the discovered hardware topology before and diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index b574dbb..378e5aa 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -987,41 +987,6 @@ void kmp_topology_t::canonicalize(int npackages, int ncores_per_pkg, _discover_uniformity(); } -// Represents running sub IDs for a single core attribute where -// attribute values have SIZE possibilities. -template struct kmp_sub_ids_t { - int last_level; // last level in topology to consider for sub_ids - int sub_id[SIZE]; // The sub ID for a given attribute value - int prev_sub_id[KMP_HW_LAST]; - IndexFunc indexer; - -public: - kmp_sub_ids_t(int last_level) : last_level(last_level) { - KMP_ASSERT(last_level < KMP_HW_LAST); - for (size_t i = 0; i < SIZE; ++i) - sub_id[i] = -1; - for (size_t i = 0; i < KMP_HW_LAST; ++i) - prev_sub_id[i] = -1; - } - void update(const kmp_hw_thread_t &hw_thread) { - int idx = indexer(hw_thread); - KMP_ASSERT(idx < (int)SIZE); - for (int level = 0; level <= last_level; ++level) { - if (hw_thread.sub_ids[level] != prev_sub_id[level]) { - if (level < last_level) - sub_id[idx] = -1; - sub_id[idx]++; - break; - } - } - for (int level = 0; level <= last_level; ++level) - prev_sub_id[level] = hw_thread.sub_ids[level]; - } - int get_sub_id(const kmp_hw_thread_t &hw_thread) const { - return sub_id[indexer(hw_thread)]; - } -}; - #if KMP_AFFINITY_SUPPORTED static kmp_str_buf_t * __kmp_hw_get_catalog_core_string(const kmp_hw_attr_t &attr, kmp_str_buf_t *buf, @@ -1084,9 +1049,12 @@ bool kmp_topology_t::filter_hw_subset() { // First, sort the KMP_HW_SUBSET items by the machine topology __kmp_hw_subset->sort(); + __kmp_hw_subset->canonicalize(__kmp_topology); + // Check to see if KMP_HW_SUBSET is a valid subset of the detected topology bool using_core_types = false; bool using_core_effs = false; + bool is_absolute = __kmp_hw_subset->is_absolute(); int hw_subset_depth = __kmp_hw_subset->get_depth(); kmp_hw_t specified[KMP_HW_LAST]; int *topology_levels = (int *)KMP_ALLOCA(sizeof(int) * hw_subset_depth); @@ -1124,12 +1092,14 @@ bool kmp_topology_t::filter_hw_subset() { // Check to see if each layer's num & offset parameters are valid max_count = get_ratio(level); - if (max_count < 0 || - (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { - bool plural = (num > 1); - KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric, - __kmp_hw_get_catalog_string(type, plural)); - return false; + if (!is_absolute) { + if (max_count < 0 || + (num != kmp_hw_subset_t::USE_ALL && num + offset > max_count)) { + bool plural = (num > 1); + KMP_AFF_WARNING(__kmp_affinity, AffHWSubsetManyGeneric, + __kmp_hw_get_catalog_string(type, plural)); + return false; + } } // Check to see if core attributes are consistent @@ -1192,7 +1162,7 @@ bool kmp_topology_t::filter_hw_subset() { } // Check that the number of requested cores with attributes is valid - if (using_core_types || using_core_effs) { + if ((using_core_types || using_core_effs) && !is_absolute) { for (int j = 0; j < item.num_attrs; ++j) { int num = item.num[j]; int offset = item.offset[j]; @@ -1248,46 +1218,92 @@ bool kmp_topology_t::filter_hw_subset() { } } - struct core_type_indexer { - int operator()(const kmp_hw_thread_t &t) const { - switch (t.attrs.get_core_type()) { - case KMP_HW_CORE_TYPE_UNKNOWN: - case KMP_HW_MAX_NUM_CORE_TYPES: - return 0; -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - case KMP_HW_CORE_TYPE_ATOM: - return 1; - case KMP_HW_CORE_TYPE_CORE: - return 2; -#endif - } - KMP_ASSERT2(false, "Unhandled kmp_hw_thread_t enumeration"); - KMP_BUILTIN_UNREACHABLE; + // For keeping track of sub_ids for an absolute KMP_HW_SUBSET + // or core attributes (core type or efficiency) + int prev_sub_ids[KMP_HW_LAST]; + int abs_sub_ids[KMP_HW_LAST]; + int core_eff_sub_ids[KMP_HW_MAX_NUM_CORE_EFFS]; + int core_type_sub_ids[KMP_HW_MAX_NUM_CORE_TYPES]; + for (size_t i = 0; i < KMP_HW_LAST; ++i) { + abs_sub_ids[i] = -1; + prev_sub_ids[i] = -1; + } + for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_EFFS; ++i) + core_eff_sub_ids[i] = -1; + for (size_t i = 0; i < KMP_HW_MAX_NUM_CORE_TYPES; ++i) + core_type_sub_ids[i] = -1; + + // Determine which hardware threads should be filtered. + + // Helpful to determine if a topology layer is targeted by an absolute subset + auto is_targeted = [&](int level) { + if (is_absolute) { + for (int i = 0; i < hw_subset_depth; ++i) + if (topology_levels[i] == level) + return true; + return false; } + // If not absolute KMP_HW_SUBSET, then every layer is seen as targeted + return true; }; - struct core_eff_indexer { - int operator()(const kmp_hw_thread_t &t) const { - return t.attrs.get_core_eff(); + + // Helpful to index into core type sub Ids array + auto get_core_type_index = [](const kmp_hw_thread_t &t) { + switch (t.attrs.get_core_type()) { + case KMP_HW_CORE_TYPE_UNKNOWN: + case KMP_HW_MAX_NUM_CORE_TYPES: + return 0; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + case KMP_HW_CORE_TYPE_ATOM: + return 1; + case KMP_HW_CORE_TYPE_CORE: + return 2; +#endif } + KMP_ASSERT2(false, "Unhandled kmp_hw_thread_t enumeration"); + KMP_BUILTIN_UNREACHABLE; }; - kmp_sub_ids_t core_type_sub_ids( - core_level); - kmp_sub_ids_t core_eff_sub_ids( - core_level); + // Helpful to index into core efficiencies sub Ids array + auto get_core_eff_index = [](const kmp_hw_thread_t &t) { + return t.attrs.get_core_eff(); + }; - // Determine which hardware threads should be filtered. int num_filtered = 0; kmp_affin_mask_t *filtered_mask; KMP_CPU_ALLOC(filtered_mask); KMP_CPU_COPY(filtered_mask, __kmp_affin_fullMask); for (int i = 0; i < num_hw_threads; ++i) { kmp_hw_thread_t &hw_thread = hw_threads[i]; - // Update type_sub_id - if (using_core_types) - core_type_sub_ids.update(hw_thread); - if (using_core_effs) - core_eff_sub_ids.update(hw_thread); + + // Figure out the absolute sub ids and core eff/type sub ids + if (is_absolute || using_core_effs || using_core_types) { + for (int level = 0; level < get_depth(); ++level) { + if (hw_thread.sub_ids[level] != prev_sub_ids[level]) { + bool found_targeted = false; + for (int j = level; j < get_depth(); ++j) { + bool targeted = is_targeted(j); + if (!found_targeted && targeted) { + found_targeted = true; + abs_sub_ids[j]++; + if (j == core_level && using_core_effs) + core_eff_sub_ids[get_core_eff_index(hw_thread)]++; + if (j == core_level && using_core_types) + core_type_sub_ids[get_core_type_index(hw_thread)]++; + } else if (targeted) { + abs_sub_ids[j] = 0; + if (j == core_level && using_core_effs) + core_eff_sub_ids[get_core_eff_index(hw_thread)] = 0; + if (j == core_level && using_core_types) + core_type_sub_ids[get_core_type_index(hw_thread)] = 0; + } + } + break; + } + } + for (int level = 0; level < get_depth(); ++level) + prev_sub_ids[level] = hw_thread.sub_ids[level]; + } // Check to see if this hardware thread should be filtered bool should_be_filtered = false; @@ -1322,20 +1338,24 @@ bool kmp_topology_t::filter_hw_subset() { int num = hw_subset_item.num[attr_idx]; int offset = hw_subset_item.offset[attr_idx]; if (using_core_types) - sub_id = core_type_sub_ids.get_sub_id(hw_thread); + sub_id = core_type_sub_ids[get_core_type_index(hw_thread)]; else - sub_id = core_eff_sub_ids.get_sub_id(hw_thread); + sub_id = core_eff_sub_ids[get_core_eff_index(hw_thread)]; if (sub_id < offset || (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) { should_be_filtered = true; break; } } else { + int sub_id; int num = hw_subset_item.num[0]; int offset = hw_subset_item.offset[0]; - if (hw_thread.sub_ids[level] < offset || - (num != kmp_hw_subset_t::USE_ALL && - hw_thread.sub_ids[level] >= offset + num)) { + if (is_absolute) + sub_id = abs_sub_ids[level]; + else + sub_id = hw_thread.sub_ids[level]; + if (sub_id < offset || + (num != kmp_hw_subset_t::USE_ALL && sub_id >= offset + num)) { should_be_filtered = true; break; } diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index 7efc090..8e9e766 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -1172,6 +1172,50 @@ public: qsort(items, depth, sizeof(item_t), hw_subset_compare); } bool specified(kmp_hw_t type) const { return ((set & (1ull << type)) > 0); } + + // Canonicalize the KMP_HW_SUBSET value if it is not an absolute subset. + // This means putting each of {sockets, cores, threads} in the topology if + // they are not specified: + // e.g., 1s,2c => 1s,2c,*t | 2c,1t => *s,2c,1t | 1t => *s,*c,1t | etc. + // e.g., 3module => *s,3module,*c,*t + // By doing this, the runtime assumes users who fiddle with KMP_HW_SUBSET + // are expecting the traditional sockets/cores/threads topology. For newer + // hardware, there can be intervening layers like dies/tiles/modules + // (usually corresponding to a cache level). So when a user asks for + // 1s,6c,2t and the topology is really 1s,2modules,4cores,2threads, the user + // should get 12 hardware threads across 6 cores and effectively ignore the + // module layer. + void canonicalize(const kmp_topology_t *top) { + // Layers to target for KMP_HW_SUBSET canonicalization + kmp_hw_t targeted[] = {KMP_HW_SOCKET, KMP_HW_CORE, KMP_HW_THREAD}; + + // Do not target-layer-canonicalize absolute KMP_HW_SUBSETS + if (is_absolute()) + return; + + // Do not target-layer-canonicalize KMP_HW_SUBSETS when the + // topology doesn't have these layers + for (kmp_hw_t type : targeted) + if (top->get_level(type) == KMP_HW_UNKNOWN) + return; + + // Put targeted layers in topology if they do not exist + for (kmp_hw_t type : targeted) { + bool found = false; + for (int i = 0; i < get_depth(); ++i) { + if (top->get_equivalent_type(items[i].type) == type) { + found = true; + break; + } + } + if (!found) { + push_back(USE_ALL, type, 0, kmp_hw_attr_t{}); + } + } + sort(); + // Set as an absolute topology that only targets the targeted layers + set_absolute(); + } void dump() const { printf("**********************\n"); printf("*** kmp_hw_subset: ***\n"); diff --git a/openmp/runtime/test/affinity/kmp-abs-hw-subset.c b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c new file mode 100644 index 0000000..7b3493f --- /dev/null +++ b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c @@ -0,0 +1,95 @@ +// RUN: %libomp-compile -D_GNU_SOURCE +// RUN: env OMP_PLACES=threads %libomp-run 1 0 +// RUN: env OMP_PLACES=threads %libomp-run 1 1 +// RUN: env OMP_PLACES=threads %libomp-run 2 1 +// RUN: env OMP_PLACES=threads %libomp-run 2 2 +// RUN: env OMP_PLACES=threads %libomp-run 3 1 +// RUN: env OMP_PLACES=threads %libomp-run 3 2 +// REQUIRES: linux + +#include +#include +#include +#include "libomp_test_affinity.h" +#include "libomp_test_topology.h" + +// Check openmp place list to make sure it follow KMP_HW_SUBSET restriction +static int compare_abs_hw_subset_places(const place_list_t *openmp_places, + int nthreads, int offset) { + int i, j, expected_per_place; + if (openmp_places->num_places != nthreads) { + fprintf( + stderr, + "error: KMP_HW_SUBSET did not restrict the thread resource layer!\n"); + printf("openmp_places places:\n"); + topology_print_places(openmp_places); + printf("\n"); + return EXIT_FAILURE; + } + for (i = 0; i < openmp_places->num_places; ++i) { + int count = affinity_mask_count(openmp_places->masks[i]); + if (count != 1) { + fprintf(stderr, "error: place %d has %d OS procs instead of %d\n", i, + count, expected_per_place); + return EXIT_FAILURE; + } + } + return EXIT_SUCCESS; +} + +static int check_places(int nthreads, int offset) { + char buf[100]; + topology_obj_type_t type; + const char *value; + int status = EXIT_SUCCESS; + place_list_t *threads, *openmp_places; + threads = topology_alloc_type_places(TOPOLOGY_OBJ_THREAD); + + if (threads->num_places <= 1) { + printf("Only one hardware thread to execute on. Skipping test.\n"); + return status; + } + + if (nthreads + offset > threads->num_places) { + printf("Only %d total hardware threads to execute on. Skipping test with " + "nthreads=%d and offset=%d (too big).\n", + threads->num_places, nthreads, offset); + return status; + } + + value = getenv("OMP_PLACES"); + if (!value) { + fprintf(stderr, "error: OMP_PLACES must be set to threads!\n"); + return EXIT_FAILURE; + } + + snprintf(buf, sizeof(buf), ":1s,%dt@%d", nthreads, offset); + setenv("KMP_HW_SUBSET", buf, 1); + + openmp_places = topology_alloc_openmp_places(); + status = compare_abs_hw_subset_places(openmp_places, nthreads, offset); + topology_free_places(threads); + topology_free_places(openmp_places); + return status; +} + +int main(int argc, char **argv) { + int offset = 0; + int nthreads = 1; + + if (!topology_using_full_mask()) { + printf("Thread does not have access to all logical processors. Skipping " + "test.\n"); + return EXIT_SUCCESS; + } + + if (argc != 3) { + fprintf(stderr, "usage: %s \n", argv[0]); + return EXIT_FAILURE; + } + + nthreads = atoi(argv[1]); + offset = atoi(argv[2]); + + return check_places(nthreads, offset); +} -- cgit v1.1 From 17642c76023b7f421dac8e9fb176b0221e309a8a Mon Sep 17 00:00:00 2001 From: Krzysztof Pszeniczny Date: Wed, 3 Apr 2024 18:46:00 +0200 Subject: [SamplePGO] Support -salvage-stale-profile without probes too (#86116) Currently -salvage-stale-profile is a no-op if the profile is not probe-based. We observed that it can help for regular, non-probe- based profiles too: some of our internal benchmarks show 0.2-0.3% QPS improvement. There seems to be no good reason to limit this flag to only work for probe-based profiles. --- llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 8 +- .../Inputs/non-probe-stale-profile-matching.prof | 23 +++ .../non-probe-stale-profile-matching.ll | 229 +++++++++++++++++++++ 3 files changed, 256 insertions(+), 4 deletions(-) create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof create mode 100644 llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp index bb46539..1ca89e0 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp @@ -247,10 +247,10 @@ void SampleProfileMatcher::runOnFunction(Function &F) { if (ReportProfileStaleness || PersistProfileStaleness) recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr); - // Run profile matching for checksum mismatched profile, currently only - // support for pseudo-probe. - if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased && - !ProbeManager->profileIsValid(F, *FSFlattened)) { + // For probe-based profiles, run matching only when the current profile is not + // valid. + if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased || + !ProbeManager->profileIsValid(F, *FSFlattened))) { // For imported functions, the checksum metadata(pseudo_probe_desc) are // dropped, so we leverage function attribute(profile-checksum-mismatch) to // transfer the info: add the attribute during pre-link phase and check it diff --git a/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof new file mode 100644 index 0000000..8e98851 --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof @@ -0,0 +1,23 @@ +main:9229397:0 + 0: 0 + 1: 0 + 1.1: 47663 + 1.2: 51871 + 2: 48723 + 3: 48723 bar:49018 + 4: 49087 + 5: 51871 bar:49588 + 7: 0 + 2: foo:1479916 + 1: 47663 + 1.1: 46683 bar:43238 + 2: 4519 bar:4932 + 3: 48723 + 4: foo:1505537 + 1: 48604 + 1.1: 46965 bar:44479 + 2: 4613 bar:4967 + 3: 49087 +bar:2333388:196222 + 0: 194449 + 1: 194449 diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll new file mode 100644 index 0000000..eb69c18a --- /dev/null +++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll @@ -0,0 +1,229 @@ +; REQUIRES: x86_64-linux +; REQUIRES: asserts +; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/non-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s + +; The profiled source code: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; x += foo(i, x); +; x += bar(x); +; } +; } + +; The source code for the current build: + +; volatile int x = 1; +; __attribute__((noinline)) int bar(int p) { +; return p; +; } + +; __attribute__((always_inline)) int foo(int i, int p) { +; if (i % 10) return bar(p); +; else return bar(p + 1); +; } + +; int main() { +; if (x == 0) // code change +; return 0; // code change +; for (int i = 0; i < 1000 * 1000; i++) { +; x += foo(i, x); +; x += bar(x); +; if (i < 0) // code change +; return 0; // code change +; x += foo(i, x); +; x += bar(x); +; } +; } + +; CHECK: Run stale profile matching for bar + +; CHECK: Run stale profile matching for foo +; CHECK: Callsite with callee:bar is matched from 1.1 to 1.1 +; CHECK: Callsite with callee:bar is matched from 2 to 2 + +; CHECK: Run stale profile matching for main +; CHECK: Callsite with callee:foo is matched from 4 to 2 +; CHECK: Callsite with callee:bar is matched from 5 to 3 +; CHECK: Callsite with callee:foo is matched from 8 to 4 +; CHECK: Callsite with callee:bar is matched from 9 to 5 + +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +@x = dso_local global i32 1, align 4 + +; Function Attrs: noinline nounwind uwtable +define dso_local i32 @bar(i32 noundef %p) #0 !dbg !9 { +entry: + ret i32 %p, !dbg !13 +} + +; Function Attrs: alwaysinline nounwind uwtable +define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #1 !dbg !14 { +entry: + %rem = srem i32 %i, 10, !dbg !15 + %tobool = icmp ne i32 %rem, 0, !dbg !15 + br i1 %tobool, label %if.then, label %if.else, !dbg !16 + +if.then: ; preds = %entry + %call = call i32 @bar(i32 noundef %p), !dbg !17 + br label %return, !dbg !19 + +if.else: ; preds = %entry + %add = add nsw i32 %p, 1, !dbg !20 + %call1 = call i32 @bar(i32 noundef %add), !dbg !21 + br label %return, !dbg !22 + +return: ; preds = %if.else, %if.then + %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !23 + ret i32 %retval.0, !dbg !24 +} + +; Function Attrs: nounwind uwtable +define dso_local i32 @main() #2 !dbg !25 { +entry: + %0 = load volatile i32, ptr @x, align 4, !dbg !26, !tbaa !27 + %cmp = icmp eq i32 %0, 0, !dbg !31 + br i1 %cmp, label %if.then, label %if.end, !dbg !26 + +if.then: ; preds = %entry + br label %for.end, !dbg !32 + +if.end: ; preds = %entry + br label %for.cond, !dbg !33 + +for.cond: ; preds = %if.end6, %if.end + %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !34 + %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !35 + br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !37 + +for.cond.cleanup: ; preds = %for.cond + br label %cleanup, !dbg !38 + +for.body: ; preds = %for.cond + %1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !27 + %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !41 + %2 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !27 + %add = add nsw i32 %2, %call, !dbg !42 + store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !27 + %3 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !27 + %call2 = call i32 @bar(i32 noundef %3), !dbg !44 + %4 = load volatile i32, ptr @x, align 4, !dbg !45, !tbaa !27 + %add3 = add nsw i32 %4, %call2, !dbg !45 + store volatile i32 %add3, ptr @x, align 4, !dbg !45, !tbaa !27 + br i1 false, label %if.then5, label %if.end6, !dbg !46 + +if.then5: ; preds = %for.body + br label %cleanup, !dbg !47 + +if.end6: ; preds = %for.body + %5 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !27 + %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !49 + %6 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !27 + %add8 = add nsw i32 %6, %call7, !dbg !50 + store volatile i32 %add8, ptr @x, align 4, !dbg !50, !tbaa !27 + %7 = load volatile i32, ptr @x, align 4, !dbg !51, !tbaa !27 + %call9 = call i32 @bar(i32 noundef %7), !dbg !52 + %8 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !27 + %add10 = add nsw i32 %8, %call9, !dbg !53 + store volatile i32 %add10, ptr @x, align 4, !dbg !53, !tbaa !27 + %inc = add nsw i32 %i.0, 1, !dbg !54 + br label %for.cond, !dbg !56, !llvm.loop !57 + +cleanup: ; preds = %if.then5, %for.cond.cleanup + br label %for.end + +for.end: ; preds = %cleanup, %if.then + ret i32 0, !dbg !61 +} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3 + +attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #1 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" } +attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } + +!llvm.dbg.cu = !{!0} +!llvm.module.flags = !{!2, !3, !4, !5, !6, !7} +!llvm.ident = !{!8} + +!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None) +!1 = !DIFile(filename: "test.c", directory: "path") +!2 = !{i32 7, !"Dwarf Version", i32 5} +!3 = !{i32 2, !"Debug Info Version", i32 3} +!4 = !{i32 1, !"wchar_size", i32 4} +!5 = !{i32 8, !"PIC Level", i32 2} +!6 = !{i32 7, !"PIE Level", i32 2} +!7 = !{i32 7, !"uwtable", i32 2} +!8 = !{!"clang version 19.0.0git"} +!9 = distinct !DISubprogram(name: "bar", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!10 = !DIFile(filename: "test.c", directory: "path") +!11 = !DISubroutineType(types: !12) +!12 = !{} +!13 = !DILocation(line: 3, column: 3, scope: !9) +!14 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 6, type: !11, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!15 = !DILocation(line: 7, column: 9, scope: !14) +!16 = !DILocation(line: 7, column: 7, scope: !14) +!17 = !DILocation(line: 7, column: 23, scope: !18) +!18 = !DILexicalBlockFile(scope: !14, file: !10, discriminator: 2) +!19 = !DILocation(line: 7, column: 15, scope: !18) +!20 = !DILocation(line: 8, column: 21, scope: !14) +!21 = !DILocation(line: 8, column: 15, scope: !14) +!22 = !DILocation(line: 8, column: 8, scope: !14) +!23 = !DILocation(line: 0, scope: !14) +!24 = !DILocation(line: 9, column: 1, scope: !14) +!25 = distinct !DISubprogram(name: "main", scope: !10, file: !10, line: 11, type: !11, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0) +!26 = !DILocation(line: 12, column: 7, scope: !25) +!27 = !{!28, !28, i64 0} +!28 = !{!"int", !29, i64 0} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !DILocation(line: 12, column: 9, scope: !25) +!32 = !DILocation(line: 13, column: 5, scope: !25) +!33 = !DILocation(line: 14, column: 8, scope: !25) +!34 = !DILocation(line: 14, scope: !25) +!35 = !DILocation(line: 14, column: 21, scope: !36) +!36 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 2) +!37 = !DILocation(line: 14, column: 3, scope: !36) +!38 = !DILocation(line: 14, column: 3, scope: !39) +!39 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 4) +!40 = !DILocation(line: 15, column: 18, scope: !25) +!41 = !DILocation(line: 15, column: 11, scope: !25) +!42 = !DILocation(line: 15, column: 8, scope: !25) +!43 = !DILocation(line: 16, column: 15, scope: !25) +!44 = !DILocation(line: 16, column: 11, scope: !25) +!45 = !DILocation(line: 16, column: 8, scope: !25) +!46 = !DILocation(line: 17, column: 10, scope: !25) +!47 = !DILocation(line: 18, column: 8, scope: !25) +!48 = !DILocation(line: 19, column: 18, scope: !25) +!49 = !DILocation(line: 19, column: 11, scope: !25) +!50 = !DILocation(line: 19, column: 8, scope: !25) +!51 = !DILocation(line: 20, column: 15, scope: !25) +!52 = !DILocation(line: 20, column: 11, scope: !25) +!53 = !DILocation(line: 20, column: 8, scope: !25) +!54 = !DILocation(line: 14, column: 37, scope: !55) +!55 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 6) +!56 = !DILocation(line: 14, column: 3, scope: !55) +!57 = distinct !{!57, !58, !59, !60} +!58 = !DILocation(line: 14, column: 3, scope: !25) +!59 = !DILocation(line: 21, column: 3, scope: !25) +!60 = !{!"llvm.loop.mustprogress"} +!61 = !DILocation(line: 22, column: 1, scope: !25) -- cgit v1.1 From 5b702be1e80b8733786ac48ceaf04f2936616d1b Mon Sep 17 00:00:00 2001 From: Prashant Kumar Date: Wed, 3 Apr 2024 22:19:26 +0530 Subject: [mlir][math] Convert math.fpowi to math.powf in case of non constant (#87472) Convert math.fpowi to math.powf by converting dtype of power operand to floating point. --- .../lib/Dialect/Math/Transforms/ExpandPatterns.cpp | 20 ++++++--- mlir/test/Dialect/Math/expand-math.mlir | 48 ++++++++++++++++++++++ 2 files changed, 63 insertions(+), 5 deletions(-) diff --git a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp index 0b85462..42629e1 100644 --- a/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp +++ b/mlir/lib/Dialect/Math/Transforms/ExpandPatterns.cpp @@ -216,20 +216,30 @@ static LogicalResult convertCeilOp(math::CeilOp op, PatternRewriter &rewriter) { // Convert `math.fpowi` to a series of `arith.mulf` operations. // If the power is negative, we divide one by the result. // If both the base and power are zero, the result is 1. -static LogicalResult convertFPowICstOp(math::FPowIOp op, - PatternRewriter &rewriter) { +// In the case of non constant power, we convert the operation to `math.powf`. +static LogicalResult convertFPowIOp(math::FPowIOp op, + PatternRewriter &rewriter) { ImplicitLocOpBuilder b(op->getLoc(), rewriter); Value base = op.getOperand(0); Value power = op.getOperand(1); Type baseType = base.getType(); + auto convertFPowItoPowf = [&]() -> LogicalResult { + Value castPowerToFp = + rewriter.create(op.getLoc(), baseType, power); + Value res = rewriter.create(op.getLoc(), baseType, base, + castPowerToFp); + rewriter.replaceOp(op, res); + return success(); + }; + Attribute cstAttr; if (!matchPattern(power, m_Constant(&cstAttr))) - return failure(); + return convertFPowItoPowf(); APInt value; if (!matchPattern(cstAttr, m_ConstantInt(&value))) - return failure(); + return convertFPowItoPowf(); int64_t powerInt = value.getSExtValue(); bool isNegative = powerInt < 0; @@ -591,7 +601,7 @@ void mlir::populateExpandPowFPattern(RewritePatternSet &patterns) { } void mlir::populateExpandFPowIPattern(RewritePatternSet &patterns) { - patterns.add(convertFPowICstOp); + patterns.add(convertFPowIOp); } void mlir::populateExpandRoundFPattern(RewritePatternSet &patterns) { diff --git a/mlir/test/Dialect/Math/expand-math.mlir b/mlir/test/Dialect/Math/expand-math.mlir index bfcff27..3d94b55 100644 --- a/mlir/test/Dialect/Math/expand-math.mlir +++ b/mlir/test/Dialect/Math/expand-math.mlir @@ -610,3 +610,51 @@ func.func @math_fpowi_scalar_zero(%0 : f32) -> f32 { // CHECK: return %[[RET]] : f32 // ----- + +// CHECK-LABEL: func.func @math_fpowi_to_powf_tensor +func.func @math_fpowi_to_powf_tensor(%0 : tensor<8xf32>, %1: tensor<8xi32>) -> tensor<8xf32> { + %2 = math.fpowi %0, %1 : tensor<8xf32>, tensor<8xi32> + return %2 : tensor<8xf32> +} +// CHECK-SAME: (%[[ARG0:.*]]: tensor<8xf32>, %[[ARG1:.*]]: tensor<8xi32>) -> tensor<8xf32> { +// CHECK: %[[CSTNEG1:.*]] = arith.constant dense<-1.000000e+00> : tensor<8xf32> +// CHECK: %[[CST2:.*]] = arith.constant dense<2.000000e+00> : tensor<8xf32> +// CHECK: %[[CST0:.*]] = arith.constant dense<0.000000e+00> : tensor<8xf32> +// CHECK: %[[TOFP:.*]] = arith.sitofp %[[ARG1]] : tensor<8xi32> to tensor<8xf32> +// CHECK: %[[SQ:.*]] = arith.mulf %[[ARG0]], %[[ARG0]] : tensor<8xf32> +// CHECK: %[[DIV:.*]] = arith.divf %[[TOFP]], %[[CST2]] : tensor<8xf32> +// CHECK: %[[LG:.*]] = math.log %[[SQ]] : tensor<8xf32> +// CHECK: %[[MUL:.*]] = arith.mulf %[[DIV]], %[[LG]] : tensor<8xf32> +// CHECK: %[[EXP:.*]] = math.exp %[[MUL]] : tensor<8xf32> +// CHECK: %[[MUL1:.*]] = arith.mulf %[[EXP]], %[[CSTNEG1]] : tensor<8xf32> +// CHECK: %[[REM:.*]] = arith.remf %[[TOFP]], %[[CST2]] : tensor<8xf32> +// CHECK: %[[CMPF:.*]] = arith.cmpf olt, %[[ARG0]], %[[CST0]] : tensor<8xf32> +// CHECK: %[[CMPF1:.*]] = arith.cmpf one, %[[REM]], %[[CST0]] : tensor<8xf32> +// CHECK: %[[AND:.*]] = arith.andi %[[CMPF1]], %[[CMPF]] : tensor<8xi1> +// CHECK: %[[SEL:.*]] = arith.select %[[AND]], %[[MUL1]], %[[EXP]] : tensor<8xi1>, tensor<8xf32> +// CHECK: return %[[SEL]] : tensor<8xf32> + +// ----- + +// CHECK-LABEL: func.func @math_fpowi_to_powf_scalar +func.func @math_fpowi_to_powf_scalar(%0 : f32, %1: i64) -> f32 { + %2 = math.fpowi %0, %1 : f32, i64 + return %2 : f32 +} +// CHECK-SAME: (%[[ARG0:.*]]: f32, %[[ARG1:.*]]: i64) -> f32 { +// CHECK: %[[CSTNEG1:.*]] = arith.constant -1.000000e+00 : f32 +// CHECK: %[[CST2:.*]] = arith.constant 2.000000e+00 : f32 +// CHECK: %[[CST0:.*]] = arith.constant 0.000000e+00 : f32 +// CHECK: %[[TOFP:.*]] = arith.sitofp %[[ARG1]] : i64 to f32 +// CHECK: %[[SQ:.*]] = arith.mulf %[[ARG0]], %[[ARG0]] : f32 +// CHECK: %[[DIV:.*]] = arith.divf %[[TOFP]], %[[CST2]] : f32 +// CHECK: %[[LG:.*]] = math.log %[[SQ]] : f32 +// CHECK: %[[MUL:.*]] = arith.mulf %[[DIV]], %[[LG]] : f32 +// CHECK: %[[EXP:.*]] = math.exp %[[MUL]] : f32 +// CHECK: %[[MUL1:.*]] = arith.mulf %[[EXP]], %[[CSTNEG1]] : f32 +// CHECK: %[[REM:.*]] = arith.remf %[[TOFP]], %[[CST2]] : f32 +// CHECK: %[[CMPF:.*]] = arith.cmpf olt, %[[ARG0]], %[[CST0]] : f32 +// CHECK: %[[CMPF1:.*]] = arith.cmpf one, %[[REM]], %[[CST0]] : f32 +// CHECK: %[[AND:.*]] = arith.andi %[[CMPF1]], %[[CMPF]] : i1 +// CHECK: %[[SEL:.*]] = arith.select %[[AND]], %[[MUL1]], %[[EXP]] : f32 +// CHECK: return %[[SEL]] : f32 -- cgit v1.1 From 1189e87951e59a81ee097eae847c06008276fef1 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 09:55:45 -0700 Subject: [CodeGen] Fix a warning This patch fixes: clang/lib/CodeGen/CGExpr.cpp:5607:11: error: variable 'Result' is used uninitialized whenever 'if' condition is false [-Werror,-Wsometimes-uninitialized] --- clang/lib/CodeGen/CGExpr.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index f70324d..0c7f48f 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5601,7 +5601,7 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); if (LV.isBitField()) { - llvm::Value *Result; + llvm::Value *Result = nullptr; // If bitfield sanitizers are enabled we want to use the result // to check whether a truncation or sign change has occurred. if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) -- cgit v1.1 From 33992eabc7834e32094e7187dc10225f1a3773a5 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Wed, 3 Apr 2024 09:58:41 -0700 Subject: [Offload][NFC] Add offload subfolder and README (#77154) The readme only states the goal and has links to further information, e.g., our meetings. --------- Co-authored-by: Shilei Tian --- offload/README.md | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 offload/README.md diff --git a/offload/README.md b/offload/README.md new file mode 100644 index 0000000..6956c1a --- /dev/null +++ b/offload/README.md @@ -0,0 +1,20 @@ +# The LLVM/Offload Subproject + +The Offload subproject aims at providing tooling, runtimes, and APIs that allow +users to execute code on accelerators or other "co-processors" that may or may +not match the architecture of their "host". In the long run, all kinds of +targets are in scope of this effort, including but not limited to: CPUs, GPUs, +FPGAs, AI/ML accelerators, distributed resources, etc. + +The project is just starting and the design is still not ironed out. More +content will show up here and on our webpage soon. In the meantime people are +encouraged to participate in our meetings (see below) and check our +[development board](https://github.com/orgs/llvm/projects/24/) as well as the +discussions on [Discourse](https://discourse.llvm.org/tag/offload). + +# Meetings + +Every second Wednesday, 7:00 - 8:00am PT, starting Jan 24, 2024. +Alternates with the OpenMP in LLVM meeting. +[invite.ics](https://drive.google.com/file/d/1AYwKdnM01aV9Gv9k435ArEAhn7PAer7z/view?usp=sharing) +[Meeting Minutes and Agenda](https://docs.google.com/document/d/1PAeEshxHCv22JDBCPA9GXGggLp0t7rsnD_jL04MBbzw/edit?usp=sharing) -- cgit v1.1 From 07a566793b2f94d0de6b95b7e6d1146b0d7ffe49 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 09:37:23 -0700 Subject: [SLP]Fix PR87477: fix alternate node cast cost/codegen. Have to compare actual type size to pick up proper cast operation opcode. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 65 +++++++++++++--------- .../SLPVectorizer/SystemZ/ext-alt-node-must-ext.ll | 34 +++++++++++ 2 files changed, 74 insertions(+), 25 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/SystemZ/ext-alt-node-must-ext.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index cb55992..7928d29 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -9063,25 +9063,35 @@ BoUpSLP::getEntryCost(const TreeEntry *E, ArrayRef VectorizedVals, cast(E->getAltOp())->getPredicate(), CostKind, E->getAltOp()); } else { - Type *Src0SclTy = E->getMainOp()->getOperand(0)->getType(); - Type *Src1SclTy = E->getAltOp()->getOperand(0)->getType(); - auto *Src0Ty = FixedVectorType::get(Src0SclTy, VL.size()); - auto *Src1Ty = FixedVectorType::get(Src1SclTy, VL.size()); - if (It != MinBWs.end()) { - if (!MinBWs.contains(getOperandEntry(E, 0))) - VecCost = - TTIRef.getCastInstrCost(Instruction::Trunc, VecTy, Src0Ty, - TTI::CastContextHint::None, CostKind); - LLVM_DEBUG({ - dbgs() << "SLP: alternate extension, which should be truncated.\n"; - E->dump(); - }); - return VecCost; + Type *SrcSclTy = E->getMainOp()->getOperand(0)->getType(); + auto *SrcTy = FixedVectorType::get(SrcSclTy, VL.size()); + if (SrcSclTy->isIntegerTy() && ScalarTy->isIntegerTy()) { + auto SrcIt = MinBWs.find(getOperandEntry(E, 0)); + unsigned BWSz = DL->getTypeSizeInBits(ScalarTy); + unsigned SrcBWSz = + DL->getTypeSizeInBits(E->getMainOp()->getOperand(0)->getType()); + if (SrcIt != MinBWs.end()) { + SrcBWSz = SrcIt->second.first; + SrcSclTy = IntegerType::get(SrcSclTy->getContext(), SrcBWSz); + SrcTy = FixedVectorType::get(SrcSclTy, VL.size()); + } + if (BWSz <= SrcBWSz) { + if (BWSz < SrcBWSz) + VecCost = + TTIRef.getCastInstrCost(Instruction::Trunc, VecTy, SrcTy, + TTI::CastContextHint::None, CostKind); + LLVM_DEBUG({ + dbgs() + << "SLP: alternate extension, which should be truncated.\n"; + E->dump(); + }); + return VecCost; + } } - VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, Src0Ty, + VecCost = TTIRef.getCastInstrCost(E->getOpcode(), VecTy, SrcTy, TTI::CastContextHint::None, CostKind); VecCost += - TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, Src1Ty, + TTIRef.getCastInstrCost(E->getAltOpcode(), VecTy, SrcTy, TTI::CastContextHint::None, CostKind); } SmallVector Mask; @@ -12591,15 +12601,20 @@ Value *BoUpSLP::vectorizeTree(TreeEntry *E, bool PostponedPHIs) { CmpInst::Predicate AltPred = AltCI->getPredicate(); V1 = Builder.CreateCmp(AltPred, LHS, RHS); } else { - if (It != MinBWs.end()) { - if (!MinBWs.contains(getOperandEntry(E, 0))) - LHS = Builder.CreateIntCast(LHS, VecTy, It->second.first); - assert(LHS->getType() == VecTy && "Expected same type as operand."); - if (auto *I = dyn_cast(LHS)) - LHS = propagateMetadata(I, E->Scalars); - E->VectorizedValue = LHS; - ++NumVectorInstructions; - return LHS; + if (LHS->getType()->isIntOrIntVectorTy() && ScalarTy->isIntegerTy()) { + unsigned SrcBWSz = DL->getTypeSizeInBits( + cast(LHS->getType())->getElementType()); + unsigned BWSz = DL->getTypeSizeInBits(ScalarTy); + if (BWSz <= SrcBWSz) { + if (BWSz < SrcBWSz) + LHS = Builder.CreateIntCast(LHS, VecTy, It->second.first); + assert(LHS->getType() == VecTy && "Expected same type as operand."); + if (auto *I = dyn_cast(LHS)) + LHS = propagateMetadata(I, E->Scalars); + E->VectorizedValue = LHS; + ++NumVectorInstructions; + return LHS; + } } V0 = Builder.CreateCast( static_cast(E->getOpcode()), LHS, VecTy); diff --git a/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-alt-node-must-ext.ll b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-alt-node-must-ext.ll new file mode 100644 index 0000000..979d0ea --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/SystemZ/ext-alt-node-must-ext.ll @@ -0,0 +1,34 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=systemz-unknown -mcpu=z15 < %s -slp-threshold=-10 | FileCheck %s + +define i32 @test(ptr %0, ptr %1) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[TMP3:%.*]] = load i64, ptr inttoptr (i64 32 to ptr), align 32 +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[TMP1]], align 8 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i8, ptr [[TMP4]], i64 32 +; CHECK-NEXT: [[TMP6:%.*]] = load i64, ptr [[TMP5]], align 8 +; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x i64> poison, i64 [[TMP6]], i32 0 +; CHECK-NEXT: [[TMP14:%.*]] = insertelement <2 x i64> [[TMP7]], i64 [[TMP3]], i32 1 +; CHECK-NEXT: [[TMP9:%.*]] = icmp ne <2 x i64> [[TMP14]], zeroinitializer +; CHECK-NEXT: [[TMP16:%.*]] = sext <2 x i1> [[TMP9]] to <2 x i8> +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i1> [[TMP9]] to <2 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <2 x i8> [[TMP16]], <2 x i8> [[TMP11]], <2 x i32> +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <2 x i8> [[TMP12]], i32 0 +; CHECK-NEXT: [[DOTNEG:%.*]] = sext i8 [[TMP13]] to i32 +; CHECK-NEXT: [[TMP15:%.*]] = extractelement <2 x i8> [[TMP12]], i32 1 +; CHECK-NEXT: [[TMP8:%.*]] = sext i8 [[TMP15]] to i32 +; CHECK-NEXT: [[TMP10:%.*]] = add nsw i32 [[DOTNEG]], [[TMP8]] +; CHECK-NEXT: ret i32 [[TMP10]] +; + %3 = load i64, ptr inttoptr (i64 32 to ptr), align 32 + %4 = load ptr, ptr %1, align 8 + %5 = getelementptr inbounds i8, ptr %4, i64 32 + %6 = load i64, ptr %5, align 8 + %7 = icmp ne i64 %3, 0 + %8 = zext i1 %7 to i32 + %9 = icmp ne i64 %6, 0 + %.neg = sext i1 %9 to i32 + %10 = add nsw i32 %.neg, %8 + ret i32 %10 +} -- cgit v1.1 From 315c88c5fbdb2b27cebf23c87fb502f7a567d84b Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Wed, 3 Apr 2024 10:19:06 -0700 Subject: [flang] Fixed MODULO(x, inf) to produce NaN. (#86145) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Straightforward computation of `A − FLOOR (A / P) * P` should produce NaN, when P is infinity. The -menable-no-infs lowering can still use the relaxed operations sequence. --- flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 5 ++- flang/lib/Optimizer/Builder/Runtime/Numeric.cpp | 22 +++++++++++- flang/runtime/numeric-templates.h | 29 +++++++++++++--- flang/test/Lower/Intrinsics/modulo.f90 | 18 +++++----- flang/unittests/Runtime/Numeric.cpp | 46 +++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 15 deletions(-) diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 069ba81..5f6de94 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -5259,9 +5259,12 @@ mlir::Value IntrinsicLibrary::genModulo(mlir::Type resultType, remainder); } + auto fastMathFlags = builder.getFastMathFlags(); // F128 arith::RemFOp may be lowered to a runtime call that may be unsupported // on the target, so generate a call to Fortran Runtime's ModuloReal16. - if (resultType == mlir::FloatType::getF128(builder.getContext())) + if (resultType == mlir::FloatType::getF128(builder.getContext()) || + (fastMathFlags & mlir::arith::FastMathFlags::ninf) == + mlir::arith::FastMathFlags::none) return builder.createConvert( loc, resultType, fir::runtime::genModulo(builder, loc, args[0], args[1])); diff --git a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp index 4dcbd13..81d5d21 100644 --- a/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp +++ b/flang/lib/Optimizer/Builder/Runtime/Numeric.cpp @@ -118,6 +118,20 @@ struct ForcedMod16 { } }; +/// Placeholder for real*10 version of Modulo Intrinsic +struct ForcedModulo10 { + static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ModuloReal10)); + static constexpr fir::runtime::FuncTypeBuilderFunc getTypeModel() { + return [](mlir::MLIRContext *ctx) { + auto fltTy = mlir::FloatType::getF80(ctx); + auto strTy = fir::ReferenceType::get(mlir::IntegerType::get(ctx, 8)); + auto intTy = mlir::IntegerType::get(ctx, 8 * sizeof(int)); + return mlir::FunctionType::get(ctx, {fltTy, fltTy, strTy, intTy}, + {fltTy}); + }; + } +}; + /// Placeholder for real*16 version of Modulo Intrinsic struct ForcedModulo16 { static constexpr const char *name = ExpandAndQuoteKey(RTNAME(ModuloReal16)); @@ -349,7 +363,13 @@ mlir::Value fir::runtime::genModulo(fir::FirOpBuilder &builder, // MODULO is lowered into math operations in intrinsics lowering, // so genModulo() should only be used for F128 data type now. - if (fltTy.isF128()) + if (fltTy.isF32()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF64()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF80()) + func = fir::runtime::getRuntimeFunc(loc, builder); + else if (fltTy.isF128()) func = fir::runtime::getRuntimeFunc(loc, builder); else fir::intrinsicTypeTODO(builder, fltTy, loc, "MODULO"); diff --git a/flang/runtime/numeric-templates.h b/flang/runtime/numeric-templates.h index af552f9..4936e77 100644 --- a/flang/runtime/numeric-templates.h +++ b/flang/runtime/numeric-templates.h @@ -237,8 +237,12 @@ inline RT_API_ATTRS T RealMod( if (ISNANTy::compute(a) || ISNANTy::compute(p) || ISINFTy::compute(a)) { return QNANTy::compute(); - } else if (ISINFTy::compute(p)) { - return a; + } else if (IS_MODULO && ISINFTy::compute(p)) { + // Other compilers behave consistently for MOD(x, +/-INF) + // and always return x. This is probably related to + // implementation of std::fmod(). Stick to this behavior + // for MOD, but return NaN for MODULO(x, +/-INF). + return QNANTy::compute(); } T aAbs{ABSTy::compute(a)}; T pAbs{ABSTy::compute(p)}; @@ -248,8 +252,19 @@ inline RT_API_ATTRS T RealMod( if (auto pInt{static_cast(p)}; p == pInt) { // Fast exact case for integer operands auto mod{aInt - (aInt / pInt) * pInt}; - if (IS_MODULO && (aInt > 0) != (pInt > 0)) { - mod += pInt; + if constexpr (IS_MODULO) { + if (mod == 0) { + // Return properly signed zero. + return pInt > 0 ? T{0} : -T{0}; + } + if ((aInt > 0) != (pInt > 0)) { + mod += pInt; + } + } else { + if (mod == 0) { + // Return properly signed zero. + return aInt > 0 ? T{0} : -T{0}; + } } return static_cast(mod); } @@ -297,7 +312,11 @@ inline RT_API_ATTRS T RealMod( } if constexpr (IS_MODULO) { if ((a < 0) != (p < 0)) { - tmp += p; + if (tmp == 0.) { + tmp = -tmp; + } else { + tmp += p; + } } } return tmp; diff --git a/flang/test/Lower/Intrinsics/modulo.f90 b/flang/test/Lower/Intrinsics/modulo.f90 index 383cb34..ac18e59 100644 --- a/flang/test/Lower/Intrinsics/modulo.f90 +++ b/flang/test/Lower/Intrinsics/modulo.f90 @@ -1,11 +1,13 @@ -! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s +! RUN: bbc -emit-fir -hlfir=false %s -o - | FileCheck %s -check-prefixes=HONORINF,ALL +! RUN: flang-new -fc1 -menable-no-infs -emit-fir -flang-deprecated-no-hlfir %s -o - | FileCheck %s -check-prefixes=CHECK,ALL -! CHECK-LABEL: func @_QPmodulo_testr( -! CHECK-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.ref{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { +! ALL-LABEL: func @_QPmodulo_testr( +! ALL-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.ref{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { subroutine modulo_testr(r, a, p) real(8) :: r, a, p - ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref - ! CHECK-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref + ! ALL-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref + ! ALL-DAG: %[[p:.*]] = fir.load %[[arg2]] : !fir.ref + ! HONORINF: %[[res:.*]] = fir.call @_FortranAModuloReal8(%[[a]], %[[p]] ! CHECK-DAG: %[[rem:.*]] = arith.remf %[[a]], %[[p]] {{.*}}: f64 ! CHECK-DAG: %[[zero:.*]] = arith.constant 0.000000e+00 : f64 ! CHECK-DAG: %[[remNotZero:.*]] = arith.cmpf une, %[[rem]], %[[zero]] {{.*}} : f64 @@ -15,12 +17,12 @@ subroutine modulo_testr(r, a, p) ! CHECK-DAG: %[[mustAddP:.*]] = arith.andi %[[remNotZero]], %[[signDifferent]] : i1 ! CHECK-DAG: %[[remPlusP:.*]] = arith.addf %[[rem]], %[[p]] {{.*}}: f64 ! CHECK: %[[res:.*]] = arith.select %[[mustAddP]], %[[remPlusP]], %[[rem]] : f64 - ! CHECK: fir.store %[[res]] to %[[arg0]] : !fir.ref + ! ALL: fir.store %[[res]] to %[[arg0]] : !fir.ref r = modulo(a, p) end subroutine -! CHECK-LABEL: func @_QPmodulo_testi( -! CHECK-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.ref{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { +! ALL-LABEL: func @_QPmodulo_testi( +! ALL-SAME: %[[arg0:.*]]: !fir.ref{{.*}}, %[[arg1:.*]]: !fir.ref{{.*}}, %[[arg2:.*]]: !fir.ref{{.*}}) { subroutine modulo_testi(r, a, p) integer(8) :: r, a, p ! CHECK-DAG: %[[a:.*]] = fir.load %[[arg1]] : !fir.ref diff --git a/flang/unittests/Runtime/Numeric.cpp b/flang/unittests/Runtime/Numeric.cpp index 43263d1..b69ff21 100644 --- a/flang/unittests/Runtime/Numeric.cpp +++ b/flang/unittests/Runtime/Numeric.cpp @@ -65,6 +65,30 @@ TEST(Numeric, Mod) { EXPECT_EQ(RTNAME(ModReal4)(Real<4>{-8.0}, Real<4>(5.0)), -3.0); EXPECT_EQ(RTNAME(ModReal8)(Real<8>{8.0}, Real<8>(-5.0)), 3.0); EXPECT_EQ(RTNAME(ModReal8)(Real<8>{-8.0}, Real<8>(-5.0)), -3.0); + EXPECT_EQ( + RTNAME(ModReal4)(Real<4>{0.5}, std::numeric_limits>::infinity()), + 0.5); + EXPECT_EQ( + RTNAME(ModReal4)(Real<4>{-0.5}, std::numeric_limits>::infinity()), + -0.5); + EXPECT_EQ( + RTNAME(ModReal4)(Real<4>{0.5}, -std::numeric_limits>::infinity()), + 0.5); + EXPECT_EQ(RTNAME(ModReal4)( + Real<4>{-0.5}, -std::numeric_limits>::infinity()), + -0.5); + EXPECT_EQ( + RTNAME(ModReal8)(Real<8>{0.5}, std::numeric_limits>::infinity()), + 0.5); + EXPECT_EQ( + RTNAME(ModReal8)(Real<8>{-0.5}, std::numeric_limits>::infinity()), + -0.5); + EXPECT_EQ( + RTNAME(ModReal8)(Real<8>{0.5}, -std::numeric_limits>::infinity()), + 0.5); + EXPECT_EQ(RTNAME(ModReal8)( + Real<8>{-0.5}, -std::numeric_limits>::infinity()), + -0.5); } TEST(Numeric, Modulo) { @@ -76,6 +100,28 @@ TEST(Numeric, Modulo) { EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-8.0}, Real<4>(5.0)), 2.0); EXPECT_EQ(RTNAME(ModuloReal8)(Real<8>{8.0}, Real<8>(-5.0)), -2.0); EXPECT_EQ(RTNAME(ModuloReal8)(Real<8>{-8.0}, Real<8>(-5.0)), -3.0); + // MODULO(x, INF) == NaN + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)( + Real<4>{0.5}, std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)( + Real<4>{-0.5}, std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)( + Real<4>{0.5}, -std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal4)( + Real<4>{-0.5}, -std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)( + Real<8>{-0.5}, std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)( + Real<8>{0.5}, std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)( + Real<8>{-0.5}, -std::numeric_limits>::infinity()))); + EXPECT_TRUE(std::isnan(RTNAME(ModuloReal8)( + Real<8>{0.5}, -std::numeric_limits>::infinity()))); + // MODULO(x, y) for integer values of x and y with 0 remainder. + EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{5.0}, Real<4>(1.0)), 0.0); + EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{5.0}, Real<4>(-1.0)), -0.0); + EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-5.0}, Real<4>(1.0)), 0.0); + EXPECT_EQ(RTNAME(ModuloReal4)(Real<4>{-5.0}, Real<4>(-1.0)), -0.0); } TEST(Numeric, Nearest) { -- cgit v1.1 From 5822ca5a013256bbca33fbbae56f49caa2e37fe3 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 3 Apr 2024 10:27:09 -0700 Subject: Revert "[clang][UBSan] Add implicit conversion check for bitfields" (#87518) Reverts llvm/llvm-project#75481 Breaks multiple bots, see #75481 --- clang/docs/ReleaseNotes.rst | 7 - clang/docs/UndefinedBehaviorSanitizer.rst | 19 +- clang/include/clang/Basic/Sanitizers.def | 20 +- clang/lib/CodeGen/CGExpr.cpp | 37 +-- clang/lib/CodeGen/CGExprScalar.cpp | 257 +++------------------ clang/lib/CodeGen/CodeGenFunction.h | 15 -- clang/test/CodeGen/ubsan-bitfield-conversion.c | 61 ----- .../test/CodeGenCXX/ubsan-bitfield-conversion.cpp | 94 -------- clang/test/Driver/fsanitize.c | 28 +-- compiler-rt/lib/ubsan/ubsan_handlers.cpp | 27 +-- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 - 11 files changed, 73 insertions(+), 493 deletions(-) delete mode 100644 clang/test/CodeGen/ubsan-bitfield-conversion.c delete mode 100644 clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e4c0e49..8fc9253 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -198,10 +198,6 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ -- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and - sign change. -- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous - group ``-fsanitize=implicit-conversion``. - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave @@ -215,9 +211,6 @@ Modified Compiler Flags - Added a new diagnostic flag ``-Wreturn-mismatch`` which is grouped under ``-Wreturn-type``, and moved some of the diagnostics previously controlled by ``-Wreturn-type`` under this new flag. Fixes #GH72116. -- ``-fsanitize=implicit-conversion`` is now a group for both - ``-fsanitize=implicit-integer-conversion`` and - ``-fsanitize=implicit-bitfield-conversion``. - Added ``-Wcast-function-type-mismatch`` under the ``-Wcast-function-type`` warning group. Moved the diagnostic previously controlled by diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 531d56e..8f58c92 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -148,11 +148,6 @@ Available checks are: Issues caught by this sanitizer are not undefined behavior, but are often unintentional. - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. - - ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from - integer of larger bit width to smaller bitfield, if that results in data - loss. This includes unsigned/signed truncations and sign changes, similarly - to how the ``-fsanitize=implicit-integer-conversion`` group works, but - explicitly for bitfields. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function parameter which is declared to never be null. - ``-fsanitize=null``: Use of a null pointer or creation of a null @@ -198,8 +193,8 @@ Available checks are: signed division overflow (``INT_MIN/-1``). Note that checks are still added even when ``-fwrapv`` is enabled. This sanitizer does not check for lossy implicit conversions performed before the computation (see - ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled - by ``-fsanitize=implicit-integer-conversion`` group of checks. + ``-fsanitize=implicit-conversion``). Both of these two issues are handled + by ``-fsanitize=implicit-conversion`` group of checks. - ``-fsanitize=unreachable``: If control flow reaches an unreachable program point. - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where @@ -207,7 +202,7 @@ Available checks are: type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation - (see ``-fsanitize=implicit-integer-conversion``). + (see ``-fsanitize=implicit-conversion``). - ``-fsanitize=vla-bound``: A variable-length array whose bound does not evaluate to a positive value. - ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of @@ -229,15 +224,11 @@ You can also use the following check groups: - ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit conversions that change the arithmetic value of the integer. Enables ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-integer-conversion``: Checks for suspicious - behavior of implicit integer conversions. Enables + - ``-fsanitize=implicit-conversion``: Checks for suspicious + behavior of implicit conversions. Enables ``implicit-unsigned-integer-truncation``, ``implicit-signed-integer-truncation``, and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-conversion``: Checks for suspicious - behavior of implicit conversions. Enables - ``implicit-integer-conversion``, and - ``implicit-bitfield-conversion``. - ``-fsanitize=integer``: Checks for undefined or suspicious integer behavior (e.g. unsigned integer overflow). Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``, diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index b228ffd07..c2137e3 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change", ImplicitIntegerArithmeticValueChange, ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation) -SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, - ImplicitIntegerArithmeticValueChange | - ImplicitUnsignedIntegerTruncation) +SANITIZER("objc-cast", ObjCCast) -// Implicit bitfield sanitizers -SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion) +// FIXME: +//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, +// ImplicitIntegerArithmeticValueChange | +// ImplicitUnsignedIntegerTruncation) +//SANITIZER_GROUP("implicit-conversion", ImplicitConversion, +// ImplicitIntegerConversion) SANITIZER_GROUP("implicit-conversion", ImplicitConversion, - ImplicitIntegerConversion | - ImplicitBitfieldConversion) + ImplicitIntegerArithmeticValueChange | + ImplicitUnsignedIntegerTruncation) SANITIZER_GROUP("integer", Integer, - ImplicitIntegerConversion | IntegerDivideByZero | Shift | + ImplicitConversion | IntegerDivideByZero | Shift | SignedIntegerOverflow | UnsignedIntegerOverflow | UnsignedShiftBase) -SANITIZER("objc-cast", ObjCCast) - SANITIZER("local-bounds", LocalBounds) SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 0c7f48f..5443235 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5580,44 +5580,11 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } - // TODO: Can we de-duplicate this code with the corresponding code in - // CGExprScalar, similar to the way EmitCompoundAssignmentLValue works? - RValue RV; - llvm::Value *Previous = nullptr; - QualType SrcType = E->getRHS()->getType(); - // Check if LHS is a bitfield, if RHS contains an implicit cast expression - // we want to extract that value and potentially (if the bitfield sanitizer - // is enabled) use it to check for an implicit conversion. - if (E->getLHS()->refersToBitField()) { - llvm::Value *RHS = - EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); - RV = RValue::get(RHS); - } else - RV = EmitAnyExpr(E->getRHS()); - + RValue RV = EmitAnyExpr(E->getRHS()); LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); - if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); - - if (LV.isBitField()) { - llvm::Value *Result = nullptr; - // If bitfield sanitizers are enabled we want to use the result - // to check whether a truncation or sign change has occurred. - if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) - EmitStoreThroughBitfieldLValue(RV, LV, &Result); - else - EmitStoreThroughBitfieldLValue(RV, LV); - - // If the expression contained an implicit conversion, make sure - // to use the value before the scalar conversion. - llvm::Value *Src = Previous ? Previous : RV.getScalarVal(); - QualType DstType = E->getLHS()->getType(); - EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, - LV.getBitFieldInfo(), E->getExprLoc()); - } else - EmitStoreThroughLValue(RV, LV); - + EmitStoreThroughLValue(RV, LV); if (getLangOpts().OpenMP) CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, E->getLHS()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a4ab8a11..397b497 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -15,7 +15,6 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" -#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -309,7 +308,6 @@ public: llvm::Type *DstTy, SourceLocation Loc); /// Known implicit conversion check kinds. - /// This is used for bitfield conversion checks as well. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. @@ -1105,21 +1103,6 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, {Src, Dst}); } -static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType, - const char *Name, - CGBuilderTy &Builder) { - bool VSigned = VType->isSignedIntegerOrEnumerationType(); - llvm::Type *VTy = V->getType(); - if (!VSigned) { - // If the value is unsigned, then it is never negative. - return llvm::ConstantInt::getFalse(VTy->getContext()); - } - llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); - return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, - llvm::Twine(Name) + "." + V->getName() + - ".negativitycheck"); -} - // Should be called within CodeGenFunction::SanitizerScope RAII scope. // Returns 'i1 false' when the conversion Src -> Dst changed the sign. static std::pair Value * { + // Is this value a signed type? + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + // FIXME: can we encounter non-scalar VTy here? + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + // Get the zero of the same type with which we will be comparing. + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + // %V.isnegative = icmp slt %V, 0 + // I.e is %V *strictly* less than zero, does it have negative value? + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); + }; + // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = - EmitIsNegativeTestHelper(Src, SrcType, "src", Builder); + llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); // 2. Is the new Value negative? - llvm::Value *DstIsNegative = - EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder); + llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); // 3. Now, was the 'negativity status' preserved during the conversion? // NOTE: conversion from negative to zero is considered to change the sign. // (We want to get 'false' when the conversion changed the sign) @@ -1244,136 +1245,6 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, {Src, Dst}); } -// Should be called within CodeGenFunction::SanitizerScope RAII scope. -// Returns 'i1 false' when the truncation Src -> Dst was lossy. -static std::pair> -EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, - QualType DstType, CGBuilderTy &Builder) { - bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); - bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); - - ScalarExprEmitter::ImplicitConversionCheckKind Kind; - if (!SrcSigned && !DstSigned) - Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; - else - Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; - - llvm::Value *Check = nullptr; - // 1. Extend the truncated value back to the same width as the Src. - Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext"); - // 2. Equality-compare with the original source value - Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck"); - // If the comparison result is 'i1 false', then the truncation was lossy. - - return std::make_pair( - Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); -} - -// Should be called within CodeGenFunction::SanitizerScope RAII scope. -// Returns 'i1 false' when the conversion Src -> Dst changed the sign. -static std::pair> -EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, - QualType DstType, CGBuilderTy &Builder) { - // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = - EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder); - // 2. Is the new Value negative? - llvm::Value *DstIsNegative = - EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder); - // 3. Now, was the 'negativity status' preserved during the conversion? - // NOTE: conversion from negative to zero is considered to change the sign. - // (We want to get 'false' when the conversion changed the sign) - // So we should just equality-compare the negativity statuses. - llvm::Value *Check = nullptr; - Check = - Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck"); - // If the comparison result is 'false', then the conversion changed the sign. - return std::make_pair( - ScalarExprEmitter::ICCK_IntegerSignChange, - std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); -} - -void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType, - Value *Dst, QualType DstType, - const CGBitFieldInfo &Info, - SourceLocation Loc) { - - if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) - return; - - // We only care about int->int conversions here. - // We ignore conversions to/from pointer and/or bool. - if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, - DstType)) - return; - - if (DstType->isBooleanType() || SrcType->isBooleanType()) - return; - - // This should be truncation of integral types. - assert(isa(Src->getType()) && - isa(Dst->getType()) && "non-integer llvm type"); - - // TODO: Calculate src width to avoid emitting code - // for unecessary cases. - unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits(); - unsigned DstBits = Info.Size; - - bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); - bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); - - CodeGenFunction::SanitizerScope SanScope(this); - - std::pair> - Check; - - // Truncation - bool EmitTruncation = DstBits < SrcBits; - // If Dst is signed and Src unsigned, we want to be more specific - // about the CheckKind we emit, in this case we want to emit - // ICCK_SignedIntegerTruncationOrSignChange. - bool EmitTruncationFromUnsignedToSigned = - EmitTruncation && DstSigned && !SrcSigned; - // Sign change - bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits; - bool BothUnsigned = !SrcSigned && !DstSigned; - bool LargerSigned = (DstBits > SrcBits) && DstSigned; - // We can avoid emitting sign change checks in some obvious cases - // 1. If Src and Dst have the same signedness and size - // 2. If both are unsigned sign check is unecessary! - // 3. If Dst is signed and bigger than Src, either - // sign-extension or zero-extension will make sure - // the sign remains. - bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned; - - if (EmitTruncation) - Check = - EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); - else if (EmitSignChange) { - assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && - "either the widths should be different, or the signednesses."); - Check = - EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); - } else - return; - - ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first; - if (EmitTruncationFromUnsignedToSigned) - CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange; - - llvm::Constant *StaticArgs[] = { - EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType), - EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), - llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)}; - - EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, - {Src, Dst}); -} - Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, QualType DstType, llvm::Type *SrcTy, llvm::Type *DstTy, @@ -2749,8 +2620,6 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; llvm::Value *input; - llvm::Value *Previous = nullptr; - QualType SrcType = E->getType(); int amount = (isInc ? 1 : -1); bool isSubtraction = !isInc; @@ -2839,8 +2708,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, "base or promoted) will be signed, or the bitwidths will match."); } if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange | - SanitizerKind::ImplicitBitfieldConversion) && + SanitizerKind::ImplicitIntegerArithmeticValueChange) && canPerformLossyDemotionCheck) { // While `x += 1` (for `x` with width less than int) is modeled as // promotion+arithmetics+demotion, and we can catch lossy demotion with @@ -2851,26 +2719,13 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // the increment/decrement in the wider type, and finally // perform the demotion. This will catch lossy demotions. - // We have a special case for bitfields defined using all the bits of the - // type. In this case we need to do the same trick as for the integer - // sanitizer checks, i.e., promotion -> increment/decrement -> demotion. - value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted if LV is not a bitfield, otherwise the bitfield sanitizer - // checks will take care of the conversion. - ScalarConversionOpts Opts; - if (!LV.isBitField()) - Opts = ScalarConversionOpts(CGF.SanOpts); - else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) { - Previous = value; - SrcType = promotedType; - } - + // emitted. value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - Opts); + ScalarConversionOpts(CGF.SanOpts)); // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps @@ -3055,12 +2910,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } // Store the updated result through the lvalue. - if (LV.isBitField()) { - Value *Src = Previous ? Previous : value; + if (LV.isBitField()) CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value); - CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(), - LV.getBitFieldInfo(), E->getExprLoc()); - } else + else CGF.EmitStoreThroughLValue(RValue::get(value), LV); // If this is a postinc, return the value read from memory, otherwise use the @@ -3565,15 +3417,8 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - // If LHSLV is a bitfield, use default ScalarConversionOpts - // to avoid emit any implicit integer checks. - Value *Previous = nullptr; - if (LHSLV.isBitField()) { - Previous = Result; - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc); - } else - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, - ScalarConversionOpts(CGF.SanOpts)); + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3592,14 +3437,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // specially because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after the // assignment...'. - if (LHSLV.isBitField()) { - Value *Src = Previous ? Previous : Result; - QualType SrcType = E->getRHS()->getType(); - QualType DstType = E->getLHS()->getType(); + if (LHSLV.isBitField()) CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result); - CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, - LHSLV.getBitFieldInfo(), E->getExprLoc()); - } else + else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); if (CGF.getLangOpts().OpenMP) @@ -4711,24 +4551,6 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } -llvm::Value *CodeGenFunction::EmitWithOriginalRHSBitfieldAssignment( - const BinaryOperator *E, Value *Previous, QualType *SrcType) { - // In case we have the integer or bitfield sanitizer checks enabled - // we want to get the expression before scalar conversion. - if (auto *ICE = dyn_cast(E->getRHS())) { - CastKind Kind = ICE->getCastKind(); - if (Kind == CK_IntegralCast) { - *SrcType = ICE->getSubExpr()->getType(); - Previous = EmitScalarExpr(ICE->getSubExpr()); - // Pass default ScalarConversionOpts to avoid emitting - // integer sanitizer checks as E refers to bitfield. - return EmitScalarConversion(Previous, *SrcType, ICE->getType(), - ICE->getExprLoc()); - } - } - return EmitScalarExpr(E->getRHS()); -} - Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { bool Ignore = TestAndClearIgnoreResultAssign(); @@ -4757,16 +4579,7 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_None: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. - Value *Previous = nullptr; - QualType SrcType = E->getRHS()->getType(); - // Check if LHS is a bitfield, if RHS contains an implicit cast expression - // we want to extract that value and potentially (if the bitfield sanitizer - // is enabled) use it to check for an implicit conversion. - if (E->getLHS()->refersToBitField()) - RHS = CGF.EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); - else - RHS = Visit(E->getRHS()); - + RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); // Store the value into the LHS. Bit-fields are handled specially @@ -4775,12 +4588,6 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // the assignment...'. if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); - // If the expression contained an implicit conversion, make sure - // to use the value before the scalar conversion. - Value *Src = Previous ? Previous : RHS; - QualType DstType = E->getLHS()->getType(); - CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType, - LHS.getBitFieldInfo(), E->getExprLoc()); } else { CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 99a7f51..e2a7e28 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2786,21 +2786,6 @@ public: /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); - /// Retrieve the implicit cast expression of the rhs in a binary operator - /// expression by passing pointers to Value and QualType - /// This is used for implicit bitfield conversion checks, which - /// must compare with the value before potential truncation. - llvm::Value *EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator *E, - llvm::Value *Previous, - QualType *SrcType); - - /// Emit a check that an [implicit] conversion of a bitfield. It is not UB, - /// so we use the value after conversion. - void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType, - llvm::Value *Dst, QualType DstType, - const CGBitFieldInfo &Info, - SourceLocation Loc); - /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c deleted file mode 100644 index ea9bdd7..0000000 --- a/clang/test/CodeGen/ubsan-bitfield-conversion.c +++ /dev/null @@ -1,61 +0,0 @@ -// RUN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION -// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// RUN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION - -typedef struct _xx { - int x1:3; - char x2:2; -} xx, *pxx; - -xx vxx; - -// CHECK-LABEL: define{{.*}} void @foo1 -void foo1(int x) { - vxx.x1 = x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo2 -void foo2(int x) { - vxx.x2 = x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo3 -void foo3() { - vxx.x1++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo4 -void foo4(int x) { - vxx.x1 += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} \ No newline at end of file diff --git a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp deleted file mode 100644 index 92f6e24..0000000 --- a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// RUN: %clang -x c++ -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION -// RUN: %clang -x c++ -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION - -struct S { - int a:3; - char b:2; -}; - -class C : public S { - public: - short c:3; -}; - -S s; -C c; - -// CHECK-LABEL: define{{.*}} void @{{.*foo1.*}} -void foo1(int x) { - s.a = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo2.*}} -void foo2(int x) { - s.b = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.b = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo3.*}} -void foo3() { - s.a++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo4.*}} -void foo4(int x) { - s.a += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} \ No newline at end of file diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 571f79a..1671825 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -35,20 +35,20 @@ // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope" // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}} -// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP -// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER +// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP +// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? +// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 27d0165..0f16507 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -555,11 +555,13 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ReportOptions Opts, ValueHandle Src, ValueHandle Dst) { SourceLocation Loc = Data->Loc.acquire(); + ErrorType ET = ErrorType::GenericUB; + const TypeDescriptor &SrcTy = Data->FromType; const TypeDescriptor &DstTy = Data->ToType; + bool SrcSigned = SrcTy.isSignedIntegerTy(); bool DstSigned = DstTy.isSignedIntegerTy(); - ErrorType ET = ErrorType::GenericUB; switch (Data->Kind) { case ICCK_IntegerTruncation: { // Legacy, no longer used. @@ -592,23 +594,14 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ScopedReport R(Opts, Loc, ET); - // In the case we have a bitfield, we want to explicitly say so in the - // error message. // FIXME: is it possible to dump the values as hex with fixed width? - if (Data->BitfieldBits) - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit bitfield, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << Data->BitfieldBits << (DstSigned ? "" : "un"); - else - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); + + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); } void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data, diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index bae661a..3bd5046 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -147,7 +147,6 @@ struct ImplicitConversionData { const TypeDescriptor &FromType; const TypeDescriptor &ToType; /* ImplicitConversionCheckKind */ unsigned char Kind; - unsigned int BitfieldBits; }; /// \brief Implict conversion that changed the value. -- cgit v1.1 From 6099639846c14991806290524b77cc25f6eb39bc Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 3 Apr 2024 10:28:07 -0700 Subject: [clang] Precommit test for `llvm.allow.ubsan.check()` (#87435) --- clang/test/CodeGen/allow-ubsan-check.c | 207 +++++++++++++++++++++++++++++++++ 1 file changed, 207 insertions(+) create mode 100644 clang/test/CodeGen/allow-ubsan-check.c diff --git a/clang/test/CodeGen/allow-ubsan-check.c b/clang/test/CodeGen/allow-ubsan-check.c new file mode 100644 index 0000000..bc42523 --- /dev/null +++ b/clang/test/CodeGen/allow-ubsan-check.c @@ -0,0 +1,207 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -fsanitize-trap=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=TRAP +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s -fsanitize=signed-integer-overflow,integer-divide-by-zero,null -fsanitize-recover=signed-integer-overflow,integer-divide-by-zero,null | FileCheck %s --check-prefixes=RECOVER + + +// CHECK-LABEL: define dso_local i32 @div( +// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0, !nosanitize [[META2:![0-9]+]] +// CHECK-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0]], -2147483648, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] +// CHECK-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] +// CHECK-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// CHECK: handler.divrem_overflow: +// CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_divrem_overflow_abort(ptr @[[GLOB1:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK: cont: +// CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] +// CHECK-NEXT: ret i32 [[DIV]] +// +// TRAP-LABEL: define dso_local i32 @div( +// TRAP-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +// TRAP-NEXT: entry: +// TRAP-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// TRAP-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// TRAP-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// TRAP-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// TRAP-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// TRAP-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// TRAP-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0, !nosanitize [[META2:![0-9]+]] +// TRAP-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0]], -2147483648, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] +// TRAP-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] +// TRAP-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP: trap: +// TRAP-NEXT: call void @llvm.ubsantrap(i8 3) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// TRAP-NEXT: unreachable, !nosanitize [[META2]] +// TRAP: cont: +// TRAP-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] +// TRAP-NEXT: ret i32 [[DIV]] +// +// RECOVER-LABEL: define dso_local i32 @div( +// RECOVER-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0:[0-9]+]] { +// RECOVER-NEXT: entry: +// RECOVER-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// RECOVER-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// RECOVER-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// RECOVER-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// RECOVER-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// RECOVER-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// RECOVER-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0, !nosanitize [[META2:![0-9]+]] +// RECOVER-NEXT: [[TMP3:%.*]] = icmp ne i32 [[TMP0]], -2147483648, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP1]], -1, !nosanitize [[META2]] +// RECOVER-NEXT: [[OR:%.*]] = or i1 [[TMP3]], [[TMP4]], !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP5:%.*]] = and i1 [[TMP2]], [[OR]], !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_DIVREM_OVERFLOW:%.*]], !prof [[PROF3:![0-9]+]], !nosanitize [[META2]] +// RECOVER: handler.divrem_overflow: +// RECOVER-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_divrem_overflow(ptr @[[GLOB1:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] +// RECOVER: cont: +// RECOVER-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP0]], [[TMP1]] +// RECOVER-NEXT: ret i32 [[DIV]] +// +int div(int x, int y) { + return x / y; +} + +// CHECK-LABEL: define dso_local i32 @null( +// CHECK-SAME: ptr noundef [[X:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// CHECK-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// CHECK: handler.type_mismatch: +// CHECK-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_type_mismatch_v1_abort(ptr @[[GLOB2:[0-9]+]], i64 [[TMP2]]) #[[ATTR3]], !nosanitize [[META2]] +// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK: cont: +// CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// CHECK-NEXT: ret i32 [[TMP3]] +// +// TRAP-LABEL: define dso_local i32 @null( +// TRAP-SAME: ptr noundef [[X:%.*]]) #[[ATTR0]] { +// TRAP-NEXT: entry: +// TRAP-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// TRAP-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// TRAP-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// TRAP-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP: trap: +// TRAP-NEXT: call void @llvm.ubsantrap(i8 22) #[[ATTR3]], !nosanitize [[META2]] +// TRAP-NEXT: unreachable, !nosanitize [[META2]] +// TRAP: cont: +// TRAP-NEXT: [[TMP2:%.*]] = load i32, ptr [[TMP0]], align 4 +// TRAP-NEXT: ret i32 [[TMP2]] +// +// RECOVER-LABEL: define dso_local i32 @null( +// RECOVER-SAME: ptr noundef [[X:%.*]]) #[[ATTR0]] { +// RECOVER-NEXT: entry: +// RECOVER-NEXT: [[X_ADDR:%.*]] = alloca ptr, align 8 +// RECOVER-NEXT: store ptr [[X]], ptr [[X_ADDR]], align 8 +// RECOVER-NEXT: [[TMP0:%.*]] = load ptr, ptr [[X_ADDR]], align 8 +// RECOVER-NEXT: [[TMP1:%.*]] = icmp ne ptr [[TMP0]], null, !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP1]], label [[CONT:%.*]], label [[HANDLER_TYPE_MISMATCH:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// RECOVER: handler.type_mismatch: +// RECOVER-NEXT: [[TMP2:%.*]] = ptrtoint ptr [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_type_mismatch_v1(ptr @[[GLOB2:[0-9]+]], i64 [[TMP2]]) #[[ATTR3]], !nosanitize [[META2]] +// RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] +// RECOVER: cont: +// RECOVER-NEXT: [[TMP3:%.*]] = load i32, ptr [[TMP0]], align 4 +// RECOVER-NEXT: ret i32 [[TMP3]] +// +int null(int* x) { + return *x; +} + +// CHECK-LABEL: define dso_local i32 @overflow( +// CHECK-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// CHECK-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0]], i32 [[TMP1]]), !nosanitize [[META2]] +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// CHECK: handler.add_overflow: +// CHECK-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// CHECK-NEXT: call void @__ubsan_handle_add_overflow_abort(ptr @[[GLOB3:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3]], !nosanitize [[META2]] +// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK: cont: +// CHECK-NEXT: ret i32 [[TMP3]] +// +// TRAP-LABEL: define dso_local i32 @overflow( +// TRAP-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// TRAP-NEXT: entry: +// TRAP-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// TRAP-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// TRAP-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// TRAP-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// TRAP-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// TRAP-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// TRAP-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0]], i32 [[TMP1]]), !nosanitize [[META2]] +// TRAP-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] +// TRAP-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] +// TRAP-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[TRAP:%.*]], !nosanitize [[META2]] +// TRAP: trap: +// TRAP-NEXT: call void @llvm.ubsantrap(i8 0) #[[ATTR3]], !nosanitize [[META2]] +// TRAP-NEXT: unreachable, !nosanitize [[META2]] +// TRAP: cont: +// TRAP-NEXT: ret i32 [[TMP3]] +// +// RECOVER-LABEL: define dso_local i32 @overflow( +// RECOVER-SAME: i32 noundef [[X:%.*]], i32 noundef [[Y:%.*]]) #[[ATTR0]] { +// RECOVER-NEXT: entry: +// RECOVER-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4 +// RECOVER-NEXT: [[Y_ADDR:%.*]] = alloca i32, align 4 +// RECOVER-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4 +// RECOVER-NEXT: store i32 [[Y]], ptr [[Y_ADDR]], align 4 +// RECOVER-NEXT: [[TMP0:%.*]] = load i32, ptr [[X_ADDR]], align 4 +// RECOVER-NEXT: [[TMP1:%.*]] = load i32, ptr [[Y_ADDR]], align 4 +// RECOVER-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[TMP0]], i32 [[TMP1]]), !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP5:%.*]] = xor i1 [[TMP4]], true, !nosanitize [[META2]] +// RECOVER-NEXT: br i1 [[TMP5]], label [[CONT:%.*]], label [[HANDLER_ADD_OVERFLOW:%.*]], !prof [[PROF3]], !nosanitize [[META2]] +// RECOVER: handler.add_overflow: +// RECOVER-NEXT: [[TMP6:%.*]] = zext i32 [[TMP0]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: [[TMP7:%.*]] = zext i32 [[TMP1]] to i64, !nosanitize [[META2]] +// RECOVER-NEXT: call void @__ubsan_handle_add_overflow(ptr @[[GLOB3:[0-9]+]], i64 [[TMP6]], i64 [[TMP7]]) #[[ATTR3]], !nosanitize [[META2]] +// RECOVER-NEXT: br label [[CONT]], !nosanitize [[META2]] +// RECOVER: cont: +// RECOVER-NEXT: ret i32 [[TMP3]] +// +int overflow(int x, int y) { + return x + y; +} +//. +// CHECK: [[META2]] = !{} +// CHECK: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} +//. +// TRAP: [[META2]] = !{} +//. +// RECOVER: [[META2]] = !{} +// RECOVER: [[PROF3]] = !{!"branch_weights", i32 1048575, i32 1} +//. -- cgit v1.1 From fff2690eba58f3a548865b5246b09233663c680d Mon Sep 17 00:00:00 2001 From: Aaron Ballman Date: Wed, 3 Apr 2024 13:26:28 -0400 Subject: [C23] Remove WG14 N2416 from the C status page This paper did not add any normative changes for us to check conformance against. It added a note describing a potential behavioral difference between compile-time and runtime evaluation of negative floating-point values in the presence of rounding modes. --- clang/www/c_status.html | 5 ----- 1 file changed, 5 deletions(-) diff --git a/clang/www/c_status.html b/clang/www/c_status.html index 370d99b..bc27b20 100644 --- a/clang/www/c_status.html +++ b/clang/www/c_status.html @@ -751,11 +751,6 @@ conformance.

Unknown - Floating-point negation and conversion - N2416 - Unknown - - Annex F.8 update for implementation extensions and rounding N2384 Unknown -- cgit v1.1 From 07d3f2a8de6956717db2355d6d3421d35f3a5796 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Wed, 3 Apr 2024 10:37:09 -0700 Subject: [RISCV][GISEL] Run update_mir_test_checks on llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir --- .../GlobalISel/legalizer/rvv/legalize-xor.mir | 88 +++++++++++----------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir index 4de02b1..8a34521 100644 --- a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-xor.mir @@ -9,8 +9,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -27,8 +27,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -45,8 +45,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -63,8 +63,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -81,8 +81,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_() = COPY $v8m2 %1:_() = COPY $v10m2 @@ -99,8 +99,8 @@ body: | ; CHECK-LABEL: name: test_nxv32i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_() = COPY $v8m4 %1:_() = COPY $v12m4 @@ -117,8 +117,8 @@ body: | ; CHECK-LABEL: name: test_nxv64i8 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_() = COPY $v8m8 %1:_() = COPY $v16m8 @@ -135,8 +135,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -153,8 +153,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -171,8 +171,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -189,8 +189,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_() = COPY $v8m2 %1:_() = COPY $v10m2 @@ -207,8 +207,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_() = COPY $v8m4 %1:_() = COPY $v12m4 @@ -225,8 +225,8 @@ body: | ; CHECK-LABEL: name: test_nxv32i16 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_() = COPY $v8m8 %1:_() = COPY $v16m8 @@ -243,8 +243,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i32 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -261,8 +261,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i32 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -279,8 +279,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i32 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_() = COPY $v8m2 %1:_() = COPY $v10m2 @@ -297,8 +297,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i32 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_() = COPY $v8m4 %1:_() = COPY $v12m4 @@ -315,8 +315,8 @@ body: | ; CHECK-LABEL: name: test_nxv16i32 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_() = COPY $v8m8 %1:_() = COPY $v16m8 @@ -333,8 +333,8 @@ body: | ; CHECK-LABEL: name: test_nxv1i64 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v9 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8 %0:_() = COPY $v8 %1:_() = COPY $v9 @@ -351,8 +351,8 @@ body: | ; CHECK-LABEL: name: test_nxv2i64 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m2 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v10m2 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m2 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m2 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m2 %0:_() = COPY $v8m2 %1:_() = COPY $v10m2 @@ -369,8 +369,8 @@ body: | ; CHECK-LABEL: name: test_nxv4i64 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m4 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v12m4 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m4 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m4 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m4 %0:_() = COPY $v8m4 %1:_() = COPY $v12m4 @@ -387,8 +387,8 @@ body: | ; CHECK-LABEL: name: test_nxv8i64 ; CHECK: [[COPY:%[0-9]+]]:_() = COPY $v8m8 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:_() = COPY $v16m8 - ; CHECK-NEXT: [[OR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] - ; CHECK-NEXT: $v8m8 = COPY [[OR]]() + ; CHECK-NEXT: [[XOR:%[0-9]+]]:_() = G_XOR [[COPY]], [[COPY1]] + ; CHECK-NEXT: $v8m8 = COPY [[XOR]]() ; CHECK-NEXT: PseudoRET implicit $v8m8 %0:_() = COPY $v8m8 %1:_() = COPY $v16m8 -- cgit v1.1 From 23616c65e7d632e750ddb67d55cc39098a69a8a6 Mon Sep 17 00:00:00 2001 From: maflcko <6399679+maflcko@users.noreply.github.com> Date: Wed, 3 Apr 2024 19:39:25 +0200 Subject: dsymutil: Re-add missing -latomic (#85380) This was accidentally removed in https://reviews.llvm.org/D137799#4657404 / https://reviews.llvm.org/D137799#C3933303OL44, and downstream projects are forced to add it back. For example, https://git.savannah.gnu.org/cgit/guix.git/commit/?id=4e26331a5ee87928a16888c36d51e270f0f10f90 Fix this, by re-adding it. Co-authored-by: MarcoFalke <*~=`'#}+{/-|&$^_@721217.xyz> --- llvm/tools/dsymutil/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index efe28bd..89225d4 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -44,4 +44,4 @@ if(APPLE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD) target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation") endif(APPLE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD) -# target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) +target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) -- cgit v1.1 From cd29126b6333c28cc4df7b932ed0d6d6c13983d1 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 13:47:50 -0400 Subject: [SLP]Fix PR87133: crash because of different altopcodes for cmps after reordering. If the node has cmp instruction with 3 or more different but swappable predicates, need to keep same kind of main/alternate opcodes to avoid incorrect detection of opcodes after reordering. Reordering changes the order and we may erroneously consider swappable opcodes as non-compatible/alternate, which may lead to a later compiler crash. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/87267 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 25 ++++++++++- .../X86/icmp-altopcode-after-reordering.ll | 51 ++++++++++++++++++++++ .../SLPVectorizer/X86/reduction-logical.ll | 51 ++++++++++++---------- 3 files changed, 104 insertions(+), 23 deletions(-) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 7928d29..9f8bc552 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -658,6 +658,29 @@ static InstructionsState getSameOpcode(ArrayRef VL, unsigned AltOpcode = Opcode; unsigned AltIndex = BaseIndex; + bool SwappedPredsCompatible = [&]() { + if (!IsCmpOp) + return false; + SetVector UniquePreds, UniqueNonSwappedPreds; + UniquePreds.insert(BasePred); + UniqueNonSwappedPreds.insert(BasePred); + for (Value *V : VL) { + auto *I = dyn_cast(V); + if (!I) + return false; + CmpInst::Predicate CurrentPred = I->getPredicate(); + CmpInst::Predicate SwappedCurrentPred = + CmpInst::getSwappedPredicate(CurrentPred); + UniqueNonSwappedPreds.insert(CurrentPred); + if (!UniquePreds.contains(CurrentPred) && + !UniquePreds.contains(SwappedCurrentPred)) + UniquePreds.insert(CurrentPred); + } + // Total number of predicates > 2, but if consider swapped predicates + // compatible only 2, consider swappable predicates as compatible opcodes, + // not alternate. + return UniqueNonSwappedPreds.size() > 2 && UniquePreds.size() == 2; + }(); // Check for one alternate opcode from another BinaryOperator. // TODO - generalize to support all operators (types, calls etc.). auto *IBase = cast(VL[BaseIndex]); @@ -710,7 +733,7 @@ static InstructionsState getSameOpcode(ArrayRef VL, CmpInst::Predicate SwappedCurrentPred = CmpInst::getSwappedPredicate(CurrentPred); - if (E == 2 && + if ((E == 2 || SwappedPredsCompatible) && (BasePred == CurrentPred || BasePred == SwappedCurrentPred)) continue; diff --git a/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll b/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll new file mode 100644 index 0000000..6b27015 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/icmp-altopcode-after-reordering.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +define i32 @test(ptr %sptr, i64 %0) { +; CHECK-LABEL: define i32 @test( +; CHECK-SAME: ptr [[SPTR:%.*]], i64 [[TMP0:%.*]]) { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP0]] to i32 +; CHECK-NEXT: [[IV2:%.*]] = getelementptr i8, ptr [[SPTR]], i64 4 +; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, ptr [[IV2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x i32> [[TMP1]], i32 [[CONV_I]], i32 1 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP4]], <4 x i32> , <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = icmp sle <4 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP7:%.*]] = icmp slt <4 x i32> [[TMP3]], [[TMP5]] +; CHECK-NEXT: [[TMP8:%.*]] = shufflevector <4 x i1> [[TMP6]], <4 x i1> [[TMP7]], <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <4 x i32> [[TMP1]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> [[TMP9]], [[TMP8]] +; CHECK-NEXT: [[TMP11:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP10]]) +; CHECK-NEXT: [[AND33:%.*]] = zext i1 [[TMP11]] to i32 +; CHECK-NEXT: ret i32 [[AND33]] +; +entry: + %conv.i = trunc i64 %0 to i32 + %iv2 = getelementptr i8, ptr %sptr, i64 4 + %1 = load i32, ptr %iv2, align 4 + %cmp11 = icmp slt i32 %1, %conv.i + %cmp.i57 = icmp eq i32 %1, 0 + %or.i5977 = or i1 %cmp.i57, %cmp11 + %iv4 = getelementptr i8, ptr %sptr, i64 12 + %2 = load i32, ptr %iv4, align 4 + %cmp16 = icmp sle i32 %2, %conv.i + %cmp.i62 = icmp eq i32 %2, 0 + %or.i6478 = or i1 %cmp.i62, %cmp16 + %iv3 = getelementptr i8, ptr %sptr, i64 8 + %3 = load i32, ptr %iv3, align 8 + %cmp21 = icmp sgt i32 %3, %conv.i + %cmp.i67 = icmp eq i32 %3, 0 + %or.i6979 = or i1 %cmp.i67, %cmp21 + %iv5 = getelementptr i8, ptr %sptr, i64 16 + %4 = load i32, ptr %iv5, align 8 + %cmp26 = icmp slt i32 %conv.i, 0 + %cmp.i72 = icmp eq i32 %4, 0 + %or.i7480 = or i1 %cmp.i72, %cmp26 + %and3183 = and i1 %or.i5977, %or.i6478 + %and3284 = and i1 %and3183, %or.i6979 + %and3385 = and i1 %and3284, %or.i7480 + %and33 = zext i1 %and3385 to i32 + ret i32 %and33 +} diff --git a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll index b5a3c57..acc04be 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/reduction-logical.ll @@ -94,17 +94,13 @@ define i1 @logical_or_fcmp(<4 x float> %x) { define i1 @logical_and_icmp_diff_preds(<4 x i32> %x) { ; SSE-LABEL: @logical_and_icmp_diff_preds( -; SSE-NEXT: [[X0:%.*]] = extractelement <4 x i32> [[X:%.*]], i32 0 -; SSE-NEXT: [[X2:%.*]] = extractelement <4 x i32> [[X]], i32 2 -; SSE-NEXT: [[C0:%.*]] = icmp ult i32 [[X0]], 0 -; SSE-NEXT: [[C2:%.*]] = icmp sgt i32 [[X2]], 0 -; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> poison, <2 x i32> -; SSE-NEXT: [[TMP2:%.*]] = icmp slt <2 x i32> [[TMP1]], zeroinitializer -; SSE-NEXT: [[TMP3:%.*]] = extractelement <2 x i1> [[TMP2]], i32 1 -; SSE-NEXT: [[S1:%.*]] = select i1 [[C0]], i1 [[TMP3]], i1 false -; SSE-NEXT: [[S2:%.*]] = select i1 [[S1]], i1 [[C2]], i1 false -; SSE-NEXT: [[TMP4:%.*]] = extractelement <2 x i1> [[TMP2]], i32 0 -; SSE-NEXT: [[S3:%.*]] = select i1 [[S2]], i1 [[TMP4]], i1 false +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> , <4 x i32> +; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[X]], <4 x i32> , <4 x i32> +; SSE-NEXT: [[TMP3:%.*]] = icmp slt <4 x i32> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP4:%.*]] = icmp ult <4 x i32> [[TMP1]], [[TMP2]] +; SSE-NEXT: [[TMP5:%.*]] = shufflevector <4 x i1> [[TMP3]], <4 x i1> [[TMP4]], <4 x i32> +; SSE-NEXT: [[TMP6:%.*]] = freeze <4 x i1> [[TMP5]] +; SSE-NEXT: [[S3:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP6]]) ; SSE-NEXT: ret i1 [[S3]] ; ; AVX-LABEL: @logical_and_icmp_diff_preds( @@ -391,17 +387,28 @@ define i1 @logical_and_icmp_clamp_partial(<4 x i32> %x) { } define i1 @logical_and_icmp_clamp_pred_diff(<4 x i32> %x) { -; CHECK-LABEL: @logical_and_icmp_clamp_pred_diff( -; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> -; CHECK-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], -; CHECK-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] -; CHECK-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) -; CHECK-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]] -; CHECK-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]]) -; CHECK-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false -; CHECK-NEXT: ret i1 [[OP_RDX]] +; SSE-LABEL: @logical_and_icmp_clamp_pred_diff( +; SSE-NEXT: [[TMP1:%.*]] = icmp slt <4 x i32> [[X:%.*]], +; SSE-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[X]], +; SSE-NEXT: [[TMP3:%.*]] = shufflevector <4 x i1> [[TMP1]], <4 x i1> [[TMP2]], <4 x i32> +; SSE-NEXT: [[TMP4:%.*]] = icmp sgt <4 x i32> [[X]], +; SSE-NEXT: [[TMP5:%.*]] = freeze <4 x i1> [[TMP4]] +; SSE-NEXT: [[TMP6:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP5]]) +; SSE-NEXT: [[TMP7:%.*]] = freeze <4 x i1> [[TMP3]] +; SSE-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v4i1(<4 x i1> [[TMP7]]) +; SSE-NEXT: [[OP_RDX:%.*]] = select i1 [[TMP6]], i1 [[TMP8]], i1 false +; SSE-NEXT: ret i1 [[OP_RDX]] +; +; AVX-LABEL: @logical_and_icmp_clamp_pred_diff( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[X:%.*]], <4 x i32> poison, <8 x i32> +; AVX-NEXT: [[TMP2:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; AVX-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> , <8 x i32> +; AVX-NEXT: [[TMP4:%.*]] = icmp sgt <8 x i32> [[TMP2]], [[TMP3]] +; AVX-NEXT: [[TMP5:%.*]] = icmp ult <8 x i32> [[TMP2]], [[TMP3]] +; AVX-NEXT: [[TMP6:%.*]] = shufflevector <8 x i1> [[TMP4]], <8 x i1> [[TMP5]], <8 x i32> +; AVX-NEXT: [[TMP7:%.*]] = freeze <8 x i1> [[TMP6]] +; AVX-NEXT: [[TMP8:%.*]] = call i1 @llvm.vector.reduce.and.v8i1(<8 x i1> [[TMP7]]) +; AVX-NEXT: ret i1 [[TMP8]] ; %x0 = extractelement <4 x i32> %x, i32 0 %x1 = extractelement <4 x i32> %x, i32 1 -- cgit v1.1 From fbcd0c65f7b2f65e0ee58e5448b88af39faf10f1 Mon Sep 17 00:00:00 2001 From: Rafael Ubal Date: Wed, 3 Apr 2024 13:49:55 -0400 Subject: Updates to 'tosa.reshape' verifier (#87416) This addition catches common cases of malformed `tosa.reshape` ops. This prevents the `--tosa-to-tensor` pass from asserting when fed invalid operations, as these will be caught ahead of time by the verifier. Closes #87396 --- mlir/lib/Dialect/Tosa/IR/TosaOps.cpp | 17 ++++++++--- mlir/test/Dialect/Tosa/invalid.mlir | 58 ++++++++++++++++++++++++++++-------- 2 files changed, 58 insertions(+), 17 deletions(-) diff --git a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp index 6e6e843..e06ac9a 100644 --- a/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp +++ b/mlir/lib/Dialect/Tosa/IR/TosaOps.cpp @@ -955,25 +955,34 @@ LogicalResult tosa::ReshapeOp::inferReturnTypeComponents( } mlir::LogicalResult tosa::ReshapeOp::verify() { - ShapedType inputType = llvm::cast(getInput1().getType()); - ShapedType outputType = llvm::cast(getType()); + TensorType inputType = getInput1().getType(); + RankedTensorType outputType = getType(); if (hasZeroDimension(inputType) || hasZeroDimension(outputType)) return emitOpError() << "tensor has a dimension with size zero. Each " "dimension of a tensor must have size >= 1"; + if ((int64_t) getNewShape().size() != outputType.getRank()) + return emitOpError() << "new shape does not match result rank"; + + for (auto [newShapeDim, outputShapeDim] : + zip(getNewShape(), outputType.getShape())) + if (newShapeDim != -1 && outputShapeDim != ShapedType::kDynamic && + newShapeDim != outputShapeDim) + return emitOpError() << "new shape is inconsistent with result shape"; + if (inputType.hasStaticShape() && outputType.hasStaticShape()) { int64_t inputElementsNum = inputType.getNumElements(); int64_t outputElementsNum = outputType.getNumElements(); if (inputElementsNum != outputElementsNum) { - return emitOpError() << "Cannot reshape " << inputElementsNum + return emitOpError() << "cannot reshape " << inputElementsNum << " elements into " << outputElementsNum; } } int missingDims = llvm::count(getNewShape(), -1); if (missingDims > 1) - return emitOpError() << "At most one target dimension can be -1"; + return emitOpError() << "expected at most one target dimension to be -1"; return mlir::success(); } diff --git a/mlir/test/Dialect/Tosa/invalid.mlir b/mlir/test/Dialect/Tosa/invalid.mlir index 38ba48f..730ac41 100644 --- a/mlir/test/Dialect/Tosa/invalid.mlir +++ b/mlir/test/Dialect/Tosa/invalid.mlir @@ -243,38 +243,70 @@ func.func @test_reshape_type_mismatch(%arg0 : tensor<13x21x3xf32>) -> () { // ----- -func.func @test_reverse_axis_out_of_range(%arg0 : tensor<13x21x3xf32>) -> () { - // expected-error@+1 {{'tosa.reverse' op expect input tensor rank (3) to be larger than reverse axis (5)}} - %0 = tosa.reverse %arg0 {axis = 5 : i32} : (tensor<13x21x3xf32>) -> tensor +func.func @test_reshape_static_zero_dim_input(%arg0 : tensor<13x0x3xf32>) -> () { + // expected-error@+1 {{'tosa.reshape' op tensor has a dimension with size zero. Each dimension of a tensor must have size >= 1}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<13x0x3xf32>) -> tensor<13x0x3xf32> return } // ----- -func.func @test_const_attribute_type_mismatch() -> tensor<100x100xf32> { - // expected-error@+1 {{'tosa.const' op failed to verify that all of {value, output} have same shape}} - %0 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x1xf32>} : () -> tensor<100x100xf32> - return %0 : tensor<100x100xf32> +func.func @test_reshape_zero_dim_input(%arg0 : tensor) -> () { + // expected-error@+1 {{'tosa.reshape' op tensor has a dimension with size zero. Each dimension of a tensor must have size >= 1}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor) -> tensor<13x0x3xf32> + return } // ----- -func.func @test_reshape_static_zero_dim_input(%arg0 : tensor<13x0x3xf32>) -> () { - // expected-error@+1 {{'tosa.reshape' op tensor has a dimension with size zero. Each dimension of a tensor must have size >= 1}} - %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<13x0x3xf32>) -> tensor<13x0x3xf32> +func.func @test_reshape_rank_mismatch(%arg0 : tensor) -> () { + // expected-error@+1 {{'tosa.reshape' op new shape does not match result rank}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor) -> tensor return } // ----- -func.func @test_reshape_zero_dim_input(%arg0 : tensor) -> () { - // expected-error@+1 {{'tosa.reshape' op tensor has a dimension with size zero. Each dimension of a tensor must have size >= 1}} - %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor) -> tensor<13x0x3xf32> +func.func @test_reshape_inconsistent_result_type(%arg0 : tensor) -> () { + // expected-error@+1 {{'tosa.reshape' op new shape is inconsistent with result shape}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor) -> tensor + return +} + +// ----- + +func.func @test_reshape_invalid_size(%arg0 : tensor<2x4xf32>) -> () { + // expected-error@+1 {{'tosa.reshape' op cannot reshape 8 elements into 15}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor<2x4xf32>) -> tensor<3x5xf32> + return +} + +// ----- + +func.func @test_reshape_invalid_placeholders(%arg0 : tensor) -> () { + // expected-error@+1 {{'tosa.reshape' op expected at most one target dimension to be -1}} + %0 = "tosa.reshape"(%arg0) {new_shape = array} : (tensor) -> tensor<2x?x?xf32> return } // ----- +func.func @test_reverse_axis_out_of_range(%arg0 : tensor<13x21x3xf32>) -> () { + // expected-error@+1 {{'tosa.reverse' op expect input tensor rank (3) to be larger than reverse axis (5)}} + %0 = tosa.reverse %arg0 {axis = 5 : i32} : (tensor<13x21x3xf32>) -> tensor + return +} + +// ----- + +func.func @test_const_attribute_type_mismatch() -> tensor<100x100xf32> { + // expected-error@+1 {{'tosa.const' op failed to verify that all of {value, output} have same shape}} + %0 = "tosa.const"() {value = dense<0.000000e+00> : tensor<1x1xf32>} : () -> tensor<100x100xf32> + return %0 : tensor<100x100xf32> +} + +// ----- + func.func @test_conv2d_static_zero_dim_input(%arg0: tensor<1x29x0x4xf32>, %arg1: tensor<16x3x3x4xf32>, %arg2: tensor<16xf32>) -> tensor<1x27x27x16xf32> { // expected-error@+1 {{'tosa.conv2d' op tensor has a dimension with size zero. Each dimension of a tensor must have size >= 1}} %0 = "tosa.conv2d"(%arg0, %arg1, %arg2) {dilation = array, pad = array, stride = array} -- cgit v1.1 From d83233f597f6d512bf7109bb4c33a7fdd2f8fd31 Mon Sep 17 00:00:00 2001 From: Louis Dionne Date: Wed, 3 Apr 2024 13:50:55 -0400 Subject: [libc++] Mark some recent LWG issues and papers as done (#87502) Justifications: - LWG3950: Done in #66206 - LWG3975: Wording changes only - LWG4011: Wording changes only - LWG4030: Wording changes only - LWG4043: Wording changes only - LWG3036 and P2875R4: We implemented neither, but the latter reverts the former, so now we implement both without doing anything! --- libcxx/docs/Status/Cxx23.rst | 1 + libcxx/docs/Status/Cxx23Issues.csv | 2 +- libcxx/docs/Status/Cxx2cIssues.csv | 10 +++++----- libcxx/docs/Status/Cxx2cPapers.csv | 2 +- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/libcxx/docs/Status/Cxx23.rst b/libcxx/docs/Status/Cxx23.rst index 3e6e33f..23d30c8 100644 --- a/libcxx/docs/Status/Cxx23.rst +++ b/libcxx/docs/Status/Cxx23.rst @@ -64,3 +64,4 @@ Library Working Group Issues Status .. [#note-LWG3750] LWG3750 Only ``__cpp_lib_format_ranges`` is fully implemented. .. [#note-LWG3798] LWG3798: ``join_with_view``, ``zip_transform_view``, and ``adjacent_transform_view`` haven't been done yet since these types aren't implemented yet. + .. [#note-LWG3036] LWG3036: This issue was reverted by P2875R4 diff --git a/libcxx/docs/Status/Cxx23Issues.csv b/libcxx/docs/Status/Cxx23Issues.csv index ebdc4a74..0229771 100644 --- a/libcxx/docs/Status/Cxx23Issues.csv +++ b/libcxx/docs/Status/Cxx23Issues.csv @@ -16,7 +16,7 @@ "`2820 `__","Clarify ```` macros","November 2020","|Nothing To Do|","" "`3120 `__","Unclear behavior of ``monotonic_buffer_resource::release()``","November 2020","","" "`3170 `__","``is_always_equal`` added to ``std::allocator`` makes the standard library treat derived types as always equal","November 2020","|Complete|","18.0" -"`3036 `__","``polymorphic_allocator::destroy`` is extraneous","November 2020","","" +"`3036 `__","``polymorphic_allocator::destroy`` is extraneous","November 2020","|Nothing To Do| [#note-LWG3036]_","" "`3171 `__","LWG2989 breaks ``directory_entry`` stream insertion","November 2020","|Complete|","14.0" "`3306 `__","``ranges::advance`` violates its preconditions","November 2020","|Complete|","14.0","|ranges|" "`3403 `__","Domain of ``ranges::ssize(E)`` doesn't ``match ranges::size(E)``","November 2020","","","|ranges|" diff --git a/libcxx/docs/Status/Cxx2cIssues.csv b/libcxx/docs/Status/Cxx2cIssues.csv index f471c43..8a4bf2e 100644 --- a/libcxx/docs/Status/Cxx2cIssues.csv +++ b/libcxx/docs/Status/Cxx2cIssues.csv @@ -42,22 +42,22 @@ "","","","","","" "`3767 `__","``codecvt`` incorrectly added to locale","Tokyo March 2024","","","" "`3919 `__","``enumerate_view`` may invoke UB for sized common non-forward underlying ranges","Tokyo March 2024","","","|ranges|" -"`3950 `__","``std::basic_string_view`` comparison operators are overspecified","Tokyo March 2024","","","" -"`3975 `__","Specializations of ``basic_format_context`` should not be permitted","Tokyo March 2024","","","|format|" +"`3950 `__","``std::basic_string_view`` comparison operators are overspecified","Tokyo March 2024","|Complete|","18.0","" +"`3975 `__","Specializations of ``basic_format_context`` should not be permitted","Tokyo March 2024","|Nothing To Do|","","|format|" "`3984 `__","``ranges::to``'s recursion branch may be ill-formed","Tokyo March 2024","","","|ranges|" -"`4011 `__","``""Effects: Equivalent to return""`` in ``[span.elem]``","Tokyo March 2024","","","" +"`4011 `__","``""Effects: Equivalent to return""`` in ``[span.elem]``","Tokyo March 2024","|Nothing To Do|","","" "`4012 `__","``common_view::begin/end`` are missing the ``simple-view`` check","Tokyo March 2024","","","|ranges|" "`4013 `__","``lazy_split_view::outer-iterator::value_type`` should not provide default constructor","Tokyo March 2024","","","|ranges|" "`4016 `__","container-insertable checks do not match what container-inserter does","Tokyo March 2024","","","" "`4023 `__","Preconditions of ``std::basic_streambuf::setg/setp``","Tokyo March 2024","","","" "`4025 `__","Move assignment operator of ``std::expected`` should not be conditionally deleted","Tokyo March 2024","","","" -"`4030 `__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","","","" +"`4030 `__","Clarify whether arithmetic expressions in ``[numeric.sat.func]`` are mathematical or C++","Tokyo March 2024","|Nothing To Do|","","" "`4031 `__","``bad_expected_access`` member functions should be ``noexcept``","Tokyo March 2024","","","" "`4035 `__","``single_view`` should provide ``empty``","Tokyo March 2024","","","|ranges|" "`4036 `__","``__alignof_is_defined`` is only implicitly specified in C++ and not yet deprecated","Tokyo March 2024","","","" "`4037 `__","Static data members of ``ctype_base`` are not yet required to be usable in constant expressions","Tokyo March 2024","","","" "`4038 `__","``std::text_encoding::aliases_view`` should have constexpr iterators","Tokyo March 2024","","","" -"`4043 `__","""ASCII"" is not a registered character encoding","Tokyo March 2024","","","" +"`4043 `__","""ASCII"" is not a registered character encoding","Tokyo March 2024","|Nothing To Do|","","" "`4045 `__","``tuple`` can create dangling references from ``tuple-like``","Tokyo March 2024","","","" "`4053 `__","Unary call to ``std::views::repeat`` does not decay the argument","Tokyo March 2024","","","|ranges|" "`4054 `__","Repeating a ``repeat_view`` should repeat the view","Tokyo March 2024","","","|ranges|" diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index efccd1694..6e82086 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -46,7 +46,7 @@ "`P2264R7 `__","LWG","Make ``assert()`` macro user friendly for C and C++","Kona November 2023","","","" "`P1673R13 `__","LWG","A free function linear algebra interface based on the BLAS","Kona November 2023","","","" "","","","","","","" -"`P2875R4 `__","LWG","Undeprecate ``polymorphic_allocator::destroy`` for C++26","Tokyo March 2024","","","" +"`P2875R4 `__","LWG","Undeprecate ``polymorphic_allocator::destroy`` for C++26","Tokyo March 2024","|Complete|","15.0","" "`P2867R2 `__","LWG","Remove Deprecated ``strstreams`` From C++26","Tokyo March 2024","","","" "`P2869R4 `__","LWG","Remove Deprecated ``shared_ptr`` Atomic Access APIs from C++26","Tokyo March 2024","","","" "`P2872R3 `__","LWG","Remove ``wstring_convert`` From C++26","Tokyo March 2024","","","" -- cgit v1.1 From d5ec49ff3dc26cdbe350e9cafc6b8e331fff7911 Mon Sep 17 00:00:00 2001 From: Chenguang Wang Date: Wed, 3 Apr 2024 10:56:55 -0700 Subject: [mlir] Initialize DefaultTimingManager::out. (#87522) `DefaultTimingManager::clear()` uses `out` to initialize `TimerImpl`, but the `out` is `nullptr` by default. This means if `DefaultTimingManager::setOutput()` is never called, `DefaultTimingManager` destructor may generate SIGSEGV. --- mlir/lib/Support/Timing.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/mlir/lib/Support/Timing.cpp b/mlir/lib/Support/Timing.cpp index 1d6796e..ac16eb7 100644 --- a/mlir/lib/Support/Timing.cpp +++ b/mlir/lib/Support/Timing.cpp @@ -499,7 +499,8 @@ public: } // namespace mlir DefaultTimingManager::DefaultTimingManager() - : impl(std::make_unique()) { + : impl(std::make_unique()), + out(std::make_unique(llvm::errs())) { clear(); // initializes the root timer } -- cgit v1.1 From a94a3cd3d6d4ca6cadaafc29c8097bd2fe078b9d Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Wed, 3 Apr 2024 10:58:17 -0700 Subject: Always check the function attribute to determine checksum mismatch for available_externally functions (#87279) This is to fix an assertion error. Apparently, `pseudo_probe_desc` could still be available for import functions, and its checksum mismatch state can be different from import function's `profile-checksum-mismatch` attr. This happens when unstable IR or ODR violation issue occurs, the definitions of the same function across different translation units could be different and result in different checksums. During link time deduplication, the internal function definition (the checksum in desc is computed based on) is substituted by the `available_externally` definition, which cause the inconsistency. Hence, we fix it to by always checking the state for the new `available_externally` definition, which is saved in the function attribute. --- .../Transforms/Utils/SampleProfileLoaderBaseImpl.h | 26 ++++++++++++++++------ .../pseudo-probe-callee-profile-mismatch.ll | 9 +++++--- 2 files changed, 25 insertions(+), 10 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h index d898ee5..581d354 100644 --- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h +++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h @@ -129,16 +129,28 @@ public: bool profileIsValid(const Function &F, const FunctionSamples &Samples) const { const auto *Desc = getDesc(F); - assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc || + bool IsAvailableExternallyLinkage = + GlobalValue::isAvailableExternallyLinkage(F.getLinkage()); + // Always check the function attribute to determine checksum mismatch for + // `available_externally` functions even if their desc are available. This + // is because the desc is computed based on the original internal function + // and it's substituted by the `available_externally` function during link + // time. However, when unstable IR or ODR violation issue occurs, the + // definitions of the same function across different translation units could + // be different and result in different checksums. So we should use the + // state from the new (available_externally) function, which is saved in its + // attribute. + assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || + IsAvailableExternallyLinkage || !Desc || profileIsHashMismatched(*Desc, Samples) == F.hasFnAttribute("profile-checksum-mismatch")) && - "In post-link, profile checksum matching state doesn't match " - "function 'profile-checksum-mismatch' attribute."); + "In post-link, profile checksum matching state doesn't match the " + "internal function's 'profile-checksum-mismatch' attribute."); (void)LTOPhase; - // The desc for import function is unavailable. Check the function attribute - // for mismatch. - return (!Desc && !F.hasFnAttribute("profile-checksum-mismatch")) || - (Desc && !profileIsHashMismatched(*Desc, Samples)); + if (IsAvailableExternallyLinkage || !Desc) + return !F.hasFnAttribute("profile-checksum-mismatch"); + + return Desc && !profileIsHashMismatched(*Desc, Samples); } }; diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll index 4881937..43be142 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll @@ -1,7 +1,9 @@ ; REQUIRES: x86_64-linux ; REQUIRES: asserts -; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s +; RUN: opt < %s -passes='thinlto' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline 2>&1 | FileCheck %s +; There is no profile-checksum-mismatch attr, even the checksum is mismatched in the pseudo_probe_desc, it doesn't run the matching. +; CHECK-NOT: Run stale profile matching for main ; CHECK: Run stale profile matching for bar ; CHECK: Callsite with callee:baz is matched from 4 to 2 @@ -14,7 +16,7 @@ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" -define i32 @main() #0 { +define available_externally i32 @main() #0 { %1 = call i32 @bar(), !dbg !13 ret i32 0 } @@ -47,7 +49,8 @@ attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" } !9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None) !10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532") !11 = !{i32 2, !"Debug Info Version", i32 3} -!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"} +; Make a checksum mismatch in the pseudo_probe_desc +!12 = !{i64 -2624081020897602054, i64 123456, !"main"} !13 = !DILocation(line: 8, column: 10, scope: !14) !14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591) !15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40) -- cgit v1.1 From 8a5a1b770413bb62ff27cd8c2aea3d04b3a95bbe Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 3 Apr 2024 10:58:39 -0700 Subject: Revert "Revert "[clang][UBSan] Add implicit conversion check for bitfields"" (#87529) Reverts llvm/llvm-project#87518 Revert is not needed as the regression was fixed with 1189e87951e59a81ee097eae847c06008276fef1. I assumed the crash and warning are different issues, but according to https://lab.llvm.org/buildbot/#/builders/240/builds/26629 fixing warning resolves the crash. --- clang/docs/ReleaseNotes.rst | 7 + clang/docs/UndefinedBehaviorSanitizer.rst | 19 +- clang/include/clang/Basic/Sanitizers.def | 20 +- clang/lib/CodeGen/CGExpr.cpp | 37 ++- clang/lib/CodeGen/CGExprScalar.cpp | 257 ++++++++++++++++++--- clang/lib/CodeGen/CodeGenFunction.h | 15 ++ clang/test/CodeGen/ubsan-bitfield-conversion.c | 61 +++++ .../test/CodeGenCXX/ubsan-bitfield-conversion.cpp | 94 ++++++++ clang/test/Driver/fsanitize.c | 28 +-- compiler-rt/lib/ubsan/ubsan_handlers.cpp | 27 ++- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 + 11 files changed, 493 insertions(+), 73 deletions(-) create mode 100644 clang/test/CodeGen/ubsan-bitfield-conversion.c create mode 100644 clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8fc9253..e4c0e49 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -198,6 +198,10 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ +- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and + sign change. +- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous + group ``-fsanitize=implicit-conversion``. - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave @@ -211,6 +215,9 @@ Modified Compiler Flags - Added a new diagnostic flag ``-Wreturn-mismatch`` which is grouped under ``-Wreturn-type``, and moved some of the diagnostics previously controlled by ``-Wreturn-type`` under this new flag. Fixes #GH72116. +- ``-fsanitize=implicit-conversion`` is now a group for both + ``-fsanitize=implicit-integer-conversion`` and + ``-fsanitize=implicit-bitfield-conversion``. - Added ``-Wcast-function-type-mismatch`` under the ``-Wcast-function-type`` warning group. Moved the diagnostic previously controlled by diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 8f58c92..531d56e 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -148,6 +148,11 @@ Available checks are: Issues caught by this sanitizer are not undefined behavior, but are often unintentional. - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. + - ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from + integer of larger bit width to smaller bitfield, if that results in data + loss. This includes unsigned/signed truncations and sign changes, similarly + to how the ``-fsanitize=implicit-integer-conversion`` group works, but + explicitly for bitfields. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function parameter which is declared to never be null. - ``-fsanitize=null``: Use of a null pointer or creation of a null @@ -193,8 +198,8 @@ Available checks are: signed division overflow (``INT_MIN/-1``). Note that checks are still added even when ``-fwrapv`` is enabled. This sanitizer does not check for lossy implicit conversions performed before the computation (see - ``-fsanitize=implicit-conversion``). Both of these two issues are handled - by ``-fsanitize=implicit-conversion`` group of checks. + ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled + by ``-fsanitize=implicit-integer-conversion`` group of checks. - ``-fsanitize=unreachable``: If control flow reaches an unreachable program point. - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where @@ -202,7 +207,7 @@ Available checks are: type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation - (see ``-fsanitize=implicit-conversion``). + (see ``-fsanitize=implicit-integer-conversion``). - ``-fsanitize=vla-bound``: A variable-length array whose bound does not evaluate to a positive value. - ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of @@ -224,11 +229,15 @@ You can also use the following check groups: - ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit conversions that change the arithmetic value of the integer. Enables ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-conversion``: Checks for suspicious - behavior of implicit conversions. Enables + - ``-fsanitize=implicit-integer-conversion``: Checks for suspicious + behavior of implicit integer conversions. Enables ``implicit-unsigned-integer-truncation``, ``implicit-signed-integer-truncation``, and ``implicit-integer-sign-change``. + - ``-fsanitize=implicit-conversion``: Checks for suspicious + behavior of implicit conversions. Enables + ``implicit-integer-conversion``, and + ``implicit-bitfield-conversion``. - ``-fsanitize=integer``: Checks for undefined or suspicious integer behavior (e.g. unsigned integer overflow). Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``, diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index c2137e3..b228ffd07 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change", ImplicitIntegerArithmeticValueChange, ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation) -SANITIZER("objc-cast", ObjCCast) +SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, + ImplicitIntegerArithmeticValueChange | + ImplicitUnsignedIntegerTruncation) -// FIXME: -//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, -// ImplicitIntegerArithmeticValueChange | -// ImplicitUnsignedIntegerTruncation) -//SANITIZER_GROUP("implicit-conversion", ImplicitConversion, -// ImplicitIntegerConversion) +// Implicit bitfield sanitizers +SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion) SANITIZER_GROUP("implicit-conversion", ImplicitConversion, - ImplicitIntegerArithmeticValueChange | - ImplicitUnsignedIntegerTruncation) + ImplicitIntegerConversion | + ImplicitBitfieldConversion) SANITIZER_GROUP("integer", Integer, - ImplicitConversion | IntegerDivideByZero | Shift | + ImplicitIntegerConversion | IntegerDivideByZero | Shift | SignedIntegerOverflow | UnsignedIntegerOverflow | UnsignedShiftBase) +SANITIZER("objc-cast", ObjCCast) + SANITIZER("local-bounds", LocalBounds) SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 5443235..0c7f48f 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5580,11 +5580,44 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } - RValue RV = EmitAnyExpr(E->getRHS()); + // TODO: Can we de-duplicate this code with the corresponding code in + // CGExprScalar, similar to the way EmitCompoundAssignmentLValue works? + RValue RV; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) { + llvm::Value *RHS = + EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); + RV = RValue::get(RHS); + } else + RV = EmitAnyExpr(E->getRHS()); + LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); + if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); - EmitStoreThroughLValue(RV, LV); + + if (LV.isBitField()) { + llvm::Value *Result = nullptr; + // If bitfield sanitizers are enabled we want to use the result + // to check whether a truncation or sign change has occurred. + if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + EmitStoreThroughBitfieldLValue(RV, LV, &Result); + else + EmitStoreThroughBitfieldLValue(RV, LV); + + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + llvm::Value *Src = Previous ? Previous : RV.getScalarVal(); + QualType DstType = E->getLHS()->getType(); + EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LV.getBitFieldInfo(), E->getExprLoc()); + } else + EmitStoreThroughLValue(RV, LV); + if (getLangOpts().OpenMP) CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, E->getLHS()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index 397b497..a4ab8a11 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -15,6 +15,7 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" +#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -308,6 +309,7 @@ public: llvm::Type *DstTy, SourceLocation Loc); /// Known implicit conversion check kinds. + /// This is used for bitfield conversion checks as well. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. @@ -1103,6 +1105,21 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, {Src, Dst}); } +static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType, + const char *Name, + CGBuilderTy &Builder) { + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); +} + // Should be called within CodeGenFunction::SanitizerScope RAII scope. // Returns 'i1 false' when the conversion Src -> Dst changed the sign. static std::pair Value * { - // Is this value a signed type? - bool VSigned = VType->isSignedIntegerOrEnumerationType(); - llvm::Type *VTy = V->getType(); - if (!VSigned) { - // If the value is unsigned, then it is never negative. - // FIXME: can we encounter non-scalar VTy here? - return llvm::ConstantInt::getFalse(VTy->getContext()); - } - // Get the zero of the same type with which we will be comparing. - llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); - // %V.isnegative = icmp slt %V, 0 - // I.e is %V *strictly* less than zero, does it have negative value? - return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, - llvm::Twine(Name) + "." + V->getName() + - ".negativitycheck"); - }; - // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "src", Builder); // 2. Is the new Value negative? - llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder); // 3. Now, was the 'negativity status' preserved during the conversion? // NOTE: conversion from negative to zero is considered to change the sign. // (We want to get 'false' when the conversion changed the sign) @@ -1245,6 +1244,136 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, {Src, Dst}); } +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the truncation Src -> Dst was lossy. +static std::pair> +EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + ScalarExprEmitter::ImplicitConversionCheckKind Kind; + if (!SrcSigned && !DstSigned) + Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; + else + Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; + + llvm::Value *Check = nullptr; + // 1. Extend the truncated value back to the same width as the Src. + Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext"); + // 2. Equality-compare with the original source value + Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck"); + // If the comparison result is 'i1 false', then the truncation was lossy. + + return std::make_pair( + Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +// Should be called within CodeGenFunction::SanitizerScope RAII scope. +// Returns 'i1 false' when the conversion Src -> Dst changed the sign. +static std::pair> +EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, + QualType DstType, CGBuilderTy &Builder) { + // 1. Was the old Value negative? + llvm::Value *SrcIsNegative = + EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder); + // 2. Is the new Value negative? + llvm::Value *DstIsNegative = + EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder); + // 3. Now, was the 'negativity status' preserved during the conversion? + // NOTE: conversion from negative to zero is considered to change the sign. + // (We want to get 'false' when the conversion changed the sign) + // So we should just equality-compare the negativity statuses. + llvm::Value *Check = nullptr; + Check = + Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck"); + // If the comparison result is 'false', then the conversion changed the sign. + return std::make_pair( + ScalarExprEmitter::ICCK_IntegerSignChange, + std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); +} + +void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType, + Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc) { + + if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) + return; + + // We only care about int->int conversions here. + // We ignore conversions to/from pointer and/or bool. + if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, + DstType)) + return; + + if (DstType->isBooleanType() || SrcType->isBooleanType()) + return; + + // This should be truncation of integral types. + assert(isa(Src->getType()) && + isa(Dst->getType()) && "non-integer llvm type"); + + // TODO: Calculate src width to avoid emitting code + // for unecessary cases. + unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits(); + unsigned DstBits = Info.Size; + + bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); + bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); + + CodeGenFunction::SanitizerScope SanScope(this); + + std::pair> + Check; + + // Truncation + bool EmitTruncation = DstBits < SrcBits; + // If Dst is signed and Src unsigned, we want to be more specific + // about the CheckKind we emit, in this case we want to emit + // ICCK_SignedIntegerTruncationOrSignChange. + bool EmitTruncationFromUnsignedToSigned = + EmitTruncation && DstSigned && !SrcSigned; + // Sign change + bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits; + bool BothUnsigned = !SrcSigned && !DstSigned; + bool LargerSigned = (DstBits > SrcBits) && DstSigned; + // We can avoid emitting sign change checks in some obvious cases + // 1. If Src and Dst have the same signedness and size + // 2. If both are unsigned sign check is unecessary! + // 3. If Dst is signed and bigger than Src, either + // sign-extension or zero-extension will make sure + // the sign remains. + bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned; + + if (EmitTruncation) + Check = + EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); + else if (EmitSignChange) { + assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && + "either the widths should be different, or the signednesses."); + Check = + EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); + } else + return; + + ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first; + if (EmitTruncationFromUnsignedToSigned) + CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange; + + llvm::Constant *StaticArgs[] = { + EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType), + EmitCheckTypeDescriptor(DstType), + llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), + llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)}; + + EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, + {Src, Dst}); +} + Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, QualType DstType, llvm::Type *SrcTy, llvm::Type *DstTy, @@ -2620,6 +2749,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; llvm::Value *input; + llvm::Value *Previous = nullptr; + QualType SrcType = E->getType(); int amount = (isInc ? 1 : -1); bool isSubtraction = !isInc; @@ -2708,7 +2839,8 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, "base or promoted) will be signed, or the bitwidths will match."); } if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange) && + SanitizerKind::ImplicitIntegerArithmeticValueChange | + SanitizerKind::ImplicitBitfieldConversion) && canPerformLossyDemotionCheck) { // While `x += 1` (for `x` with width less than int) is modeled as // promotion+arithmetics+demotion, and we can catch lossy demotion with @@ -2719,13 +2851,26 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // the increment/decrement in the wider type, and finally // perform the demotion. This will catch lossy demotions. + // We have a special case for bitfields defined using all the bits of the + // type. In this case we need to do the same trick as for the integer + // sanitizer checks, i.e., promotion -> increment/decrement -> demotion. + value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted. + // emitted if LV is not a bitfield, otherwise the bitfield sanitizer + // checks will take care of the conversion. + ScalarConversionOpts Opts; + if (!LV.isBitField()) + Opts = ScalarConversionOpts(CGF.SanOpts); + else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) { + Previous = value; + SrcType = promotedType; + } + value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - ScalarConversionOpts(CGF.SanOpts)); + Opts); // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps @@ -2910,9 +3055,12 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } // Store the updated result through the lvalue. - if (LV.isBitField()) + if (LV.isBitField()) { + Value *Src = Previous ? Previous : value; CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(), + LV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(value), LV); // If this is a postinc, return the value read from memory, otherwise use the @@ -3417,8 +3565,15 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, - ScalarConversionOpts(CGF.SanOpts)); + // If LHSLV is a bitfield, use default ScalarConversionOpts + // to avoid emit any implicit integer checks. + Value *Previous = nullptr; + if (LHSLV.isBitField()) { + Previous = Result; + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc); + } else + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3437,9 +3592,14 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // specially because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after the // assignment...'. - if (LHSLV.isBitField()) + if (LHSLV.isBitField()) { + Value *Src = Previous ? Previous : Result; + QualType SrcType = E->getRHS()->getType(); + QualType DstType = E->getLHS()->getType(); CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result); - else + CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, + LHSLV.getBitFieldInfo(), E->getExprLoc()); + } else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); if (CGF.getLangOpts().OpenMP) @@ -4551,6 +4711,24 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } +llvm::Value *CodeGenFunction::EmitWithOriginalRHSBitfieldAssignment( + const BinaryOperator *E, Value *Previous, QualType *SrcType) { + // In case we have the integer or bitfield sanitizer checks enabled + // we want to get the expression before scalar conversion. + if (auto *ICE = dyn_cast(E->getRHS())) { + CastKind Kind = ICE->getCastKind(); + if (Kind == CK_IntegralCast) { + *SrcType = ICE->getSubExpr()->getType(); + Previous = EmitScalarExpr(ICE->getSubExpr()); + // Pass default ScalarConversionOpts to avoid emitting + // integer sanitizer checks as E refers to bitfield. + return EmitScalarConversion(Previous, *SrcType, ICE->getType(), + ICE->getExprLoc()); + } + } + return EmitScalarExpr(E->getRHS()); +} + Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { bool Ignore = TestAndClearIgnoreResultAssign(); @@ -4579,7 +4757,16 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_None: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. - RHS = Visit(E->getRHS()); + Value *Previous = nullptr; + QualType SrcType = E->getRHS()->getType(); + // Check if LHS is a bitfield, if RHS contains an implicit cast expression + // we want to extract that value and potentially (if the bitfield sanitizer + // is enabled) use it to check for an implicit conversion. + if (E->getLHS()->refersToBitField()) + RHS = CGF.EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); + else + RHS = Visit(E->getRHS()); + LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); // Store the value into the LHS. Bit-fields are handled specially @@ -4588,6 +4775,12 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // the assignment...'. if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); + // If the expression contained an implicit conversion, make sure + // to use the value before the scalar conversion. + Value *Src = Previous ? Previous : RHS; + QualType DstType = E->getLHS()->getType(); + CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType, + LHS.getBitFieldInfo(), E->getExprLoc()); } else { CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e2a7e28..99a7f51 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2786,6 +2786,21 @@ public: /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); + /// Retrieve the implicit cast expression of the rhs in a binary operator + /// expression by passing pointers to Value and QualType + /// This is used for implicit bitfield conversion checks, which + /// must compare with the value before potential truncation. + llvm::Value *EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator *E, + llvm::Value *Previous, + QualType *SrcType); + + /// Emit a check that an [implicit] conversion of a bitfield. It is not UB, + /// so we use the value after conversion. + void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType, + llvm::Value *Dst, QualType DstType, + const CGBitFieldInfo &Info, + SourceLocation Loc); + /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c new file mode 100644 index 0000000..ea9bdd7 --- /dev/null +++ b/clang/test/CodeGen/ubsan-bitfield-conversion.c @@ -0,0 +1,61 @@ +// RUN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +typedef struct _xx { + int x1:3; + char x2:2; +} xx, *pxx; + +xx vxx; + +// CHECK-LABEL: define{{.*}} void @foo1 +void foo1(int x) { + vxx.x1 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo2 +void foo2(int x) { + vxx.x2 = x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo3 +void foo3() { + vxx.x1++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @foo4 +void foo4(int x) { + vxx.x1 += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp new file mode 100644 index 0000000..92f6e24 --- /dev/null +++ b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp @@ -0,0 +1,94 @@ +// RUN: %clang -x c++ -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION +// RUN: %clang -x c++ -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK +// RUN: %clang -x c++ -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION + +struct S { + int a:3; + char b:2; +}; + +class C : public S { + public: + short c:3; +}; + +S s; +C c; + +// CHECK-LABEL: define{{.*}} void @{{.*foo1.*}} +void foo1(int x) { + s.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo2.*}} +void foo2(int x) { + s.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.b = x; + // CHECK: store i8 %{{.*}} + // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 + // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo3.*}} +void foo3() { + s.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a++; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} + +// CHECK-LABEL: define{{.*}} void @{{.*foo4.*}} +void foo4(int x) { + s.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + c.a += x; + // CHECK: store i8 %{{.*}} + // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 + // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 + // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 + // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion + // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 + // CHECK-BITFIELD-CONVERSION: [[CONT]]: + // CHECK-NEXT: ret void +} \ No newline at end of file diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 1671825..571f79a 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -35,20 +35,20 @@ // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope" // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}} -// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER -// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP -// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER +// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP +// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 0f16507..27d0165 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -555,13 +555,11 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ReportOptions Opts, ValueHandle Src, ValueHandle Dst) { SourceLocation Loc = Data->Loc.acquire(); - ErrorType ET = ErrorType::GenericUB; - const TypeDescriptor &SrcTy = Data->FromType; const TypeDescriptor &DstTy = Data->ToType; - bool SrcSigned = SrcTy.isSignedIntegerTy(); bool DstSigned = DstTy.isSignedIntegerTy(); + ErrorType ET = ErrorType::GenericUB; switch (Data->Kind) { case ICCK_IntegerTruncation: { // Legacy, no longer used. @@ -594,14 +592,23 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ScopedReport R(Opts, Loc, ET); + // In the case we have a bitfield, we want to explicitly say so in the + // error message. // FIXME: is it possible to dump the values as hex with fixed width? - - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); + if (Data->BitfieldBits) + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit bitfield, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << Data->BitfieldBits << (DstSigned ? "" : "un"); + else + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); } void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data, diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index 3bd5046..bae661a 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -147,6 +147,7 @@ struct ImplicitConversionData { const TypeDescriptor &FromType; const TypeDescriptor &ToType; /* ImplicitConversionCheckKind */ unsigned char Kind; + unsigned int BitfieldBits; }; /// \brief Implict conversion that changed the value. -- cgit v1.1 From 42c7bc04c30b427414a2d957776b1655abb27b6e Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 3 Apr 2024 19:10:14 +0100 Subject: [AArch64][ARM] Make neon fp16 generic intrinsics always available. (#87467) By generic intrinsics this mean things like dup, ext, zip and bsl that can always be executed with integer s16 operations and do not require fullfp16. This makes them always available, and brings them inline with GCC. https://godbolt.org/z/azs8eMv54 The relevant test cases have been moved into their own files, to allow them to be tested with armv8-a and armv8.2-a+fp16. --- clang/include/clang/Basic/arm_neon.td | 31 +- clang/lib/CodeGen/CGBuiltin.cpp | 10 - .../aarch64-v8.2a-neon-intrinsics-generic.c | 485 +++++++++++++++++ clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c | 472 ---------------- .../CodeGen/arm-v8.2a-neon-intrinsics-generic.c | 600 +++++++++++++++++++++ clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c | 178 ------ 6 files changed, 1100 insertions(+), 676 deletions(-) create mode 100644 clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c create mode 100644 clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c diff --git a/clang/include/clang/Basic/arm_neon.td b/clang/include/clang/Basic/arm_neon.td index f16de97..7edac5a 100644 --- a/clang/include/clang/Basic/arm_neon.td +++ b/clang/include/clang/Basic/arm_neon.td @@ -1758,24 +1758,21 @@ let TargetGuard = "fullfp16" in { // Mul lane def VMUL_LANEH : IOpInst<"vmul_lane", "..qI", "hQh", OP_MUL_LN>; def VMUL_NH : IOpInst<"vmul_n", "..1", "hQh", OP_MUL_N>; +} - // Data processing intrinsics - section 5 - - // Logical operations - let isHiddenLInst = 1 in - def VBSLH : SInst<"vbsl", ".U..", "hQh">; - - // Transposition operations - def VZIPH : WInst<"vzip", "2..", "hQh">; - def VUZPH : WInst<"vuzp", "2..", "hQh">; - def VTRNH : WInst<"vtrn", "2..", "hQh">; - - // Vector Extract - def VEXTH : WInst<"vext", "...I", "hQh">; +// Data processing intrinsics - section 5. Do not require fullfp16. - // Reverse vector elements - def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; -} +// Logical operations +let isHiddenLInst = 1 in +def VBSLH : SInst<"vbsl", ".U..", "hQh">; +// Transposition operations +def VZIPH : WInst<"vzip", "2..", "hQh">; +def VUZPH : WInst<"vuzp", "2..", "hQh">; +def VTRNH : WInst<"vtrn", "2..", "hQh">; +// Vector Extract +def VEXTH : WInst<"vext", "...I", "hQh">; +// Reverse vector elements +def VREV64H : WOpInst<"vrev64", "..", "hQh", OP_REV64>; // ARMv8.2-A FP16 vector intrinsics for A64 only. let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { @@ -1857,7 +1854,9 @@ let ArchGuard = "defined(__aarch64__)", TargetGuard = "fullfp16" in { def VMINVH : SInst<"vminv", "1.", "hQh">; def FMAXNMVH : SInst<"vmaxnmv", "1.", "hQh">; def FMINNMVH : SInst<"vminnmv", "1.", "hQh">; +} +let ArchGuard = "defined(__aarch64__)" in { // Permutation def VTRN1H : SOpInst<"vtrn1", "...", "hQh", OP_TRN1>; def VZIP1H : SOpInst<"vzip1", "...", "hQh", OP_ZIP1>; diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 483f9c2..2537e71 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -7281,8 +7281,6 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vabdq_f16, NEON::BI__builtin_neon_vabdq_v, }, { NEON::BI__builtin_neon_vabs_f16, NEON::BI__builtin_neon_vabs_v, }, { NEON::BI__builtin_neon_vabsq_f16, NEON::BI__builtin_neon_vabsq_v, }, - { NEON::BI__builtin_neon_vbsl_f16, NEON::BI__builtin_neon_vbsl_v, }, - { NEON::BI__builtin_neon_vbslq_f16, NEON::BI__builtin_neon_vbslq_v, }, { NEON::BI__builtin_neon_vcage_f16, NEON::BI__builtin_neon_vcage_v, }, { NEON::BI__builtin_neon_vcageq_f16, NEON::BI__builtin_neon_vcageq_v, }, { NEON::BI__builtin_neon_vcagt_f16, NEON::BI__builtin_neon_vcagt_v, }, @@ -7301,8 +7299,6 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vclezq_f16, NEON::BI__builtin_neon_vclezq_v, }, { NEON::BI__builtin_neon_vcltz_f16, NEON::BI__builtin_neon_vcltz_v, }, { NEON::BI__builtin_neon_vcltzq_f16, NEON::BI__builtin_neon_vcltzq_v, }, - { NEON::BI__builtin_neon_vext_f16, NEON::BI__builtin_neon_vext_v, }, - { NEON::BI__builtin_neon_vextq_f16, NEON::BI__builtin_neon_vextq_v, }, { NEON::BI__builtin_neon_vfma_f16, NEON::BI__builtin_neon_vfma_v, }, { NEON::BI__builtin_neon_vfma_lane_f16, NEON::BI__builtin_neon_vfma_lane_v, }, { NEON::BI__builtin_neon_vfma_laneq_f16, NEON::BI__builtin_neon_vfma_laneq_v, }, @@ -7405,12 +7401,6 @@ static const std::pair NEONEquivalentIntrinsicMap[] = { { NEON::BI__builtin_neon_vst4_lane_bf16, NEON::BI__builtin_neon_vst4_lane_v }, { NEON::BI__builtin_neon_vst4q_bf16, NEON::BI__builtin_neon_vst4q_v }, { NEON::BI__builtin_neon_vst4q_lane_bf16, NEON::BI__builtin_neon_vst4q_lane_v }, - { NEON::BI__builtin_neon_vtrn_f16, NEON::BI__builtin_neon_vtrn_v, }, - { NEON::BI__builtin_neon_vtrnq_f16, NEON::BI__builtin_neon_vtrnq_v, }, - { NEON::BI__builtin_neon_vuzp_f16, NEON::BI__builtin_neon_vuzp_v, }, - { NEON::BI__builtin_neon_vuzpq_f16, NEON::BI__builtin_neon_vuzpq_v, }, - { NEON::BI__builtin_neon_vzip_f16, NEON::BI__builtin_neon_vzip_v, }, - { NEON::BI__builtin_neon_vzipq_f16, NEON::BI__builtin_neon_vzipq_v, }, // The mangling rules cause us to have one ID for each type for vldap1(q)_lane // and vstl1(q)_lane, but codegen is equivalent for all of them. Choose an // arbitrary one to be handled as tha canonical variation. diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c new file mode 100644 index 0000000..7839180 --- /dev/null +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics-generic.c @@ -0,0 +1,485 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature -fullfp16 -target-feature +v8a\ +// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=mem2reg \ +// RUN: | FileCheck %s +// RUN: %clang_cc1 -triple arm64-none-linux-gnu -target-feature +neon -target-feature +fullfp16 -target-feature +v8.2a\ +// RUN: -flax-vector-conversions=none -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=mem2reg \ +// RUN: | FileCheck %s + +// REQUIRES: aarch64-registered-target + +#include + +// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16 +// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> +// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] +// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], +// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> +// CHECK-NEXT: ret <4 x half> [[TMP4]] +// +float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { + return vbsl_f16(a, b, c); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16 +// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> +// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> +// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] +// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], +// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] +// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] +// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> +// CHECK-NEXT: ret <8 x half> [[TMP4]] +// +float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { + return vbslq_f16(a, b, c); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { + return vzip_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { + return vzipq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { + return vuzp_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { + return vuzpq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] +// +float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { + return vtrn_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 +// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 +// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16 +// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 +// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 +// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 +// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 +// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 +// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] +// +float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { + return vtrnq_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vmov_n_f16(float16_t a) { + return vmov_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vmovq_n_f16(float16_t a) { + return vmovq_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vdup_n_f16(float16_t a) { + return vdup_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 +// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vdupq_n_f16(float16_t a) { + return vdupq_n_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_lane_f16(float16x4_t a) { + return vdup_lane_f16(a, 3); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_lane_f16(float16x4_t a) { + return vdupq_lane_f16(a, 3); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_laneq_f16(float16x8_t a) { + return vdup_laneq_f16(a, 1); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_laneq_f16(float16x8_t a) { + return vdupq_laneq_f16(a, 7); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vext_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[VEXT]] +// +float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { + return vext_f16(a, b, 2); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[VEXT]] +// +float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { + return vextq_f16(a, b, 5); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vrev64_f16(float16x4_t a) { + return vrev64_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vrev64q_f16(float16x8_t a) { + return vrev64q_f16(a); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) { + return vzip1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) { + return vzip1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) { + return vzip2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) { + return vzip2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) { + return vuzp1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) { + return vuzp1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) { + return vuzp2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) { + return vuzp2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) { + return vtrn1_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) { + return vtrn1q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16 +// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) { + return vtrn2_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16 +// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) { + return vtrn2q_f16(a, b); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16 +// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 +// CHECK-NEXT: ret half [[VGETQ_LANE]] +// +float16_t test_vduph_laneq_f16(float16x8_t vec) { + return vduph_laneq_f16(vec, 7); +} + +// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16 +// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 +// CHECK-NEXT: ret half [[VGET_LANE]] +// +float16_t test_vduph_lane_f16(float16x4_t vec) { + return vduph_lane_f16(vec, 3); +} diff --git a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c index 4163e6e..617d515 100644 --- a/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/aarch64-v8.2a-neon-intrinsics.c @@ -2004,475 +2004,3 @@ float16_t test_vminnmv_f16(float16x4_t a) { float16_t test_vminnmvq_f16(float16x8_t a) { return vminnmvq_f16(a); } - -// CHECK-LABEL: define {{[^@]+}}@test_vbsl_f16 -// CHECK-SAME: (<4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <4 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i16> [[A]], -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <4 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <4 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[VBSL5_I]] to <4 x half> -// CHECK-NEXT: ret <4 x half> [[TMP4]] -// -float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { - return vbsl_f16(a, b, c); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vbslq_f16 -// CHECK-SAME: (<8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> -// CHECK-NEXT: [[VBSL1_I:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x i16> -// CHECK-NEXT: [[VBSL2_I:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> -// CHECK-NEXT: [[VBSL3_I:%.*]] = and <8 x i16> [[A]], [[VBSL1_I]] -// CHECK-NEXT: [[TMP3:%.*]] = xor <8 x i16> [[A]], -// CHECK-NEXT: [[VBSL4_I:%.*]] = and <8 x i16> [[TMP3]], [[VBSL2_I]] -// CHECK-NEXT: [[VBSL5_I:%.*]] = or <8 x i16> [[VBSL3_I]], [[VBSL4_I]] -// CHECK-NEXT: [[TMP4:%.*]] = bitcast <8 x i16> [[VBSL5_I]] to <8 x half> -// CHECK-NEXT: ret <8 x half> [[TMP4]] -// -float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { - return vbslq_f16(a, b, c); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { - return vzip_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzipq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VZIP_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { - return vzipq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { - return vuzp_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzpq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VUZP_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { - return vuzpq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T:%.*]], align 8 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X4X2_T]], align 8 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 8 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL_I]], align 8 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X4X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <4 x half>] [[TMP5]], ptr [[TMP4]], align 8 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X4X2_T]], ptr [[RETVAL]], align 8 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X4X2_T]] [[TMP6]] -// -float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { - return vtrn_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrnq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[RETVAL_I:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T:%.*]], align 16 -// CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FLOAT16X8X2_T]], align 16 -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VTRN_I]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[RETVAL_I]], i32 1 -// CHECK-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 16 -// CHECK-NEXT: [[TMP3:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL_I]], align 16 -// CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], i32 0, i32 0 -// CHECK-NEXT: [[TMP5:%.*]] = extractvalue [[STRUCT_FLOAT16X8X2_T]] [[TMP3]], 0 -// CHECK-NEXT: store [2 x <8 x half>] [[TMP5]], ptr [[TMP4]], align 16 -// CHECK-NEXT: [[TMP6:%.*]] = load [[STRUCT_FLOAT16X8X2_T]], ptr [[RETVAL]], align 16 -// CHECK-NEXT: ret [[STRUCT_FLOAT16X8X2_T]] [[TMP6]] -// -float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { - return vtrnq_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vmov_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] -// -float16x4_t test_vmov_n_f16(float16_t a) { - return vmov_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vmovq_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] -// -float16x8_t test_vmovq_n_f16(float16_t a) { - return vmovq_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: ret <4 x half> [[VECINIT3]] -// -float16x4_t test_vdup_n_f16(float16_t a) { - return vdup_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_n_f16 -// CHECK-SAME: (half noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 -// CHECK-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 -// CHECK-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 -// CHECK-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 -// CHECK-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 -// CHECK-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 -// CHECK-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 -// CHECK-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 -// CHECK-NEXT: ret <8 x half> [[VECINIT7]] -// -float16x8_t test_vdupq_n_f16(float16_t a) { - return vdupq_n_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_lane_f16(float16x4_t a) { - return vdup_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_lane_f16(float16x4_t a) { - return vdupq_lane_f16(a, 3); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdup_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[LANE]] -// -float16x4_t test_vdup_laneq_f16(float16x8_t a) { - return vdup_laneq_f16(a, 1); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vdupq_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[LANE:%.*]] = shufflevector <8 x half> [[TMP1]], <8 x half> [[TMP1]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[LANE]] -// -float16x8_t test_vdupq_laneq_f16(float16x8_t a) { - return vdupq_laneq_f16(a, 7); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vext_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[VEXT]] -// -float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { - return vext_f16(a, b, 2); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vextq_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> -// CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> -// CHECK-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[VEXT]] -// -float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { - return vextq_f16(a, b, 5); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vrev64_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vrev64_f16(float16x4_t a) { - return vrev64_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vrev64q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vrev64q_f16(float16x8_t a) { - return vrev64q_f16(a); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vzip1_f16(float16x4_t a, float16x4_t b) { - return vzip1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vzip1q_f16(float16x8_t a, float16x8_t b) { - return vzip1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vzip2_f16(float16x4_t a, float16x4_t b) { - return vzip2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vzip2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vzip2q_f16(float16x8_t a, float16x8_t b) { - return vzip2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vuzp1_f16(float16x4_t a, float16x4_t b) { - return vuzp1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vuzp1q_f16(float16x8_t a, float16x8_t b) { - return vuzp1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vuzp2_f16(float16x4_t a, float16x4_t b) { - return vuzp2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vuzp2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vuzp2q_f16(float16x8_t a, float16x8_t b) { - return vuzp2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn1_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vtrn1_f16(float16x4_t a, float16x4_t b) { - return vtrn1_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn1q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vtrn1q_f16(float16x8_t a, float16x8_t b) { - return vtrn1q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn2_f16 -// CHECK-SAME: (<4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> -// CHECK-NEXT: ret <4 x half> [[SHUFFLE_I]] -// -float16x4_t test_vtrn2_f16(float16x4_t a, float16x4_t b) { - return vtrn2_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vtrn2q_f16 -// CHECK-SAME: (<8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> -// CHECK-NEXT: ret <8 x half> [[SHUFFLE_I]] -// -float16x8_t test_vtrn2q_f16(float16x8_t a, float16x8_t b) { - return vtrn2q_f16(a, b); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_laneq_f16 -// CHECK-SAME: (<8 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGETQ_LANE:%.*]] = extractelement <8 x half> [[VEC]], i32 7 -// CHECK-NEXT: ret half [[VGETQ_LANE]] -// -float16_t test_vduph_laneq_f16(float16x8_t vec) { - return vduph_laneq_f16(vec, 7); -} - -// CHECK-LABEL: define {{[^@]+}}@test_vduph_lane_f16 -// CHECK-SAME: (<4 x half> noundef [[VEC:%.*]]) #[[ATTR0]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[VGET_LANE:%.*]] = extractelement <4 x half> [[VEC]], i32 3 -// CHECK-NEXT: ret half [[VGET_LANE]] -// -float16_t test_vduph_lane_f16(float16x4_t vec) { - return vduph_lane_f16(vec, 3); -} diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c new file mode 100644 index 0000000..f8d8333 --- /dev/null +++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics-generic.c @@ -0,0 +1,600 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -triple armv8.2a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature -fullfp16 \ +// RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa \ +// RUN: | FileCheck %s --check-prefixes=CHECK-NOFP16 +// RUN: %clang_cc1 -triple armv8a-linux-gnu -target-abi apcs-gnu -target-feature +neon -target-feature +fullfp16 \ +// RUN: -S -disable-O0-optnone -emit-llvm -o - %s \ +// RUN: | opt -S -passes=sroa \ +// RUN: | FileCheck %s --check-prefixes=CHECK-FP16 + +// REQUIRES: arm-registered-target + +#include + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vbsl_f16( +// CHECK-NOFP16-SAME: <4 x i16> noundef [[A:%.*]], <2 x i32> noundef [[B_COERCE:%.*]], <2 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[C_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP6]], <8 x i8> [[TMP7]], <8 x i8> [[TMP8]]) +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <4 x half> [[TMP9]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <2 x i32> [[TMP10]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <4 x half> [[TMP11]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP12]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vbsl_f16( +// CHECK-FP16-SAME: <4 x i16> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]], <4 x half> noundef [[C:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[C]] to <8 x i8> +// CHECK-FP16-NEXT: [[VBSL_V_I:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL_V_I]] to <4 x half> +// CHECK-FP16-NEXT: ret <4 x half> [[TMP3]] +// +float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { + return vbsl_f16(a, b, c); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vbslq_f16( +// CHECK-NOFP16-SAME: <8 x i16> noundef [[A:%.*]], <4 x i32> noundef [[B_COERCE:%.*]], <4 x i32> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[C_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP6]], <16 x i8> [[TMP7]], <16 x i8> [[TMP8]]) +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = bitcast <8 x half> [[TMP9]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP11:%.*]] = bitcast <4 x i32> [[TMP10]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP12:%.*]] = bitcast <8 x half> [[TMP11]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP12]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vbslq_f16( +// CHECK-FP16-SAME: <8 x i16> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]], <8 x half> noundef [[C:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x i16> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[C]] to <16 x i8> +// CHECK-FP16-NEXT: [[VBSLQ_V_I:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSLQ_V_I]] to <8 x half> +// CHECK-FP16-NEXT: ret <8 x half> [[TMP3]] +// +float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { + return vbslq_f16(a, b, c); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vzip_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META3]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vzip_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META3]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META3]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { + return vzip_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vzipq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VZIP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VZIP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META6]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vzipq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VZIP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VZIP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META6]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VZIP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VZIP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META6]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { + return vzipq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vuzp_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META9]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vuzp_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META9]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META9]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { + return vuzp_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vuzpq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VUZP3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VUZP3_I]], ptr [[TMP10]], align 4, !alias.scope [[META12]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vuzpq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VUZP_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VUZP_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META12]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VUZP1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VUZP1_I]], ptr [[TMP2]], align 4, !alias.scope [[META12]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { + return vuzpq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vtrn_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <2 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP2]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x half> [[TMP4]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP5]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <8 x i8> [[TMP6]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <8 x i8> [[TMP7]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <4 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <4 x i16> [[TMP8]], <4 x i16> [[TMP9]], <4 x i32> +// CHECK-NOFP16-NEXT: store <4 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META15]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vtrn_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X4X2_T:%.*]]) align 8 [[AGG_RESULT:%.*]], <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META15]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <4 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[B]], <4 x i32> +// CHECK-FP16-NEXT: store <4 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META15]] +// CHECK-FP16-NEXT: ret void +// +float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { + return vtrn_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local void @test_vtrnq_f16( +// CHECK-NOFP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <4 x i32> +// CHECK-NOFP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP2]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x half> [[TMP4]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP5]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP8:%.*]] = bitcast <16 x i8> [[TMP6]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP9:%.*]] = bitcast <16 x i8> [[TMP7]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] +// CHECK-NOFP16-NEXT: [[TMP10:%.*]] = getelementptr inbounds <8 x i16>, ptr [[AGG_RESULT]], i32 1 +// CHECK-NOFP16-NEXT: [[VTRN3_I:%.*]] = shufflevector <8 x i16> [[TMP8]], <8 x i16> [[TMP9]], <8 x i32> +// CHECK-NOFP16-NEXT: store <8 x i16> [[VTRN3_I]], ptr [[TMP10]], align 4, !alias.scope [[META18]] +// CHECK-NOFP16-NEXT: ret void +// +// CHECK-FP16-LABEL: define dso_local void @test_vtrnq_f16( +// CHECK-FP16-SAME: ptr dead_on_unwind noalias writable sret([[STRUCT_FLOAT16X8X2_T:%.*]]) align 16 [[AGG_RESULT:%.*]], <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[VTRN_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VTRN_I]], ptr [[AGG_RESULT]], align 4, !alias.scope [[META18]] +// CHECK-FP16-NEXT: [[TMP2:%.*]] = getelementptr inbounds <8 x half>, ptr [[AGG_RESULT]], i32 1 +// CHECK-FP16-NEXT: [[VTRN1_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[B]], <8 x i32> +// CHECK-FP16-NEXT: store <8 x half> [[VTRN1_I]], ptr [[TMP2]], align 4, !alias.scope [[META18]] +// CHECK-FP16-NEXT: ret void +// +float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { + return vtrnq_f16(a, b); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vmov_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vmov_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vmov_n_f16(float16_t a) { + return vmov_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vmovq_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vmovq_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vmovq_n_f16(float16_t a) { + return vmovq_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[VECINIT3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <4 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <4 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <4 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <4 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: ret <4 x half> [[VECINIT3]] +// +float16x4_t test_vdup_n_f16(float16_t a) { + return vdup_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_n_f16( +// CHECK-NOFP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-NOFP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-NOFP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-NOFP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-NOFP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-NOFP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-NOFP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-NOFP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[VECINIT7]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP0]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_n_f16( +// CHECK-FP16-SAME: half noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[VECINIT:%.*]] = insertelement <8 x half> poison, half [[A]], i32 0 +// CHECK-FP16-NEXT: [[VECINIT1:%.*]] = insertelement <8 x half> [[VECINIT]], half [[A]], i32 1 +// CHECK-FP16-NEXT: [[VECINIT2:%.*]] = insertelement <8 x half> [[VECINIT1]], half [[A]], i32 2 +// CHECK-FP16-NEXT: [[VECINIT3:%.*]] = insertelement <8 x half> [[VECINIT2]], half [[A]], i32 3 +// CHECK-FP16-NEXT: [[VECINIT4:%.*]] = insertelement <8 x half> [[VECINIT3]], half [[A]], i32 4 +// CHECK-FP16-NEXT: [[VECINIT5:%.*]] = insertelement <8 x half> [[VECINIT4]], half [[A]], i32 5 +// CHECK-FP16-NEXT: [[VECINIT6:%.*]] = insertelement <8 x half> [[VECINIT5]], half [[A]], i32 6 +// CHECK-FP16-NEXT: [[VECINIT7:%.*]] = insertelement <8 x half> [[VECINIT6]], half [[A]], i32 7 +// CHECK-FP16-NEXT: ret <8 x half> [[VECINIT7]] +// +float16x8_t test_vdupq_n_f16(float16_t a) { + return vdupq_n_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vdup_lane_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[LANE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x half> [[TMP3]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP4]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vdup_lane_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> +// CHECK-FP16-NEXT: ret <4 x half> [[LANE]] +// +float16x4_t test_vdup_lane_f16(float16x4_t a) { + return vdup_lane_f16(a, 3); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vdupq_lane_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[LANE:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> [[TMP2]], <8 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i16> [[LANE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x half> [[TMP3]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP4]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vdupq_lane_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> +// CHECK-FP16-NEXT: ret <8 x half> [[LANE]] +// +float16x8_t test_vdupq_lane_f16(float16x4_t a) { + return vdupq_lane_f16(a, 3); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vext_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]], <2 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[B_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x half> [[TMP0]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[TMP1]] to <8 x i8> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[TMP2]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x i8> [[TMP3]] to <4 x i16> +// CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x i16> [[TMP4]], <4 x i16> [[TMP5]], <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <4 x i16> [[VEXT]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <4 x half> [[TMP6]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP7]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vext_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]], <4 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <4 x half> [[A]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[B]] to <8 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> +// CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> +// CHECK-FP16-NEXT: ret <4 x half> [[VEXT]] +// +float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { + return vext_f16(a, b, 2); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vextq_f16( +// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]], <4 x i32> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x i32> [[B_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <8 x half> [[TMP0]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[TMP1]] to <16 x i8> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <16 x i8> [[TMP2]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <16 x i8> [[TMP3]] to <8 x i16> +// CHECK-NOFP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x i16> [[TMP4]], <8 x i16> [[TMP5]], <8 x i32> +// CHECK-NOFP16-NEXT: [[TMP6:%.*]] = bitcast <8 x i16> [[VEXT]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP7:%.*]] = bitcast <8 x half> [[TMP6]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP7]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vextq_f16( +// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]], <8 x half> noundef [[B:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[TMP0:%.*]] = bitcast <8 x half> [[A]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[B]] to <16 x i8> +// CHECK-FP16-NEXT: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> +// CHECK-FP16-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> +// CHECK-FP16-NEXT: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> +// CHECK-FP16-NEXT: ret <8 x half> [[VEXT]] +// +float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { + return vextq_f16(a, b, 5); +} + +// CHECK-NOFP16-LABEL: define dso_local <2 x i32> @test_vrev64_f16( +// CHECK-NOFP16-SAME: <2 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[A_COERCE]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <4 x half> [[TMP0]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[TMP1]] to <4 x half> +// CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP2]], <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <4 x half> [[SHUFFLE_I]] to <2 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <2 x i32> [[TMP3]] to <4 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <4 x half> [[TMP4]] to <2 x i32> +// CHECK-NOFP16-NEXT: ret <2 x i32> [[TMP5]] +// +// CHECK-FP16-LABEL: define dso_local <4 x half> @test_vrev64_f16( +// CHECK-FP16-SAME: <4 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <4 x half> [[A]], <4 x half> [[A]], <4 x i32> +// CHECK-FP16-NEXT: ret <4 x half> [[SHUFFLE_I]] +// +float16x4_t test_vrev64_f16(float16x4_t a) { + return vrev64_f16(a); +} + +// CHECK-NOFP16-LABEL: define dso_local <4 x i32> @test_vrev64q_f16( +// CHECK-NOFP16-SAME: <4 x i32> noundef [[A_COERCE:%.*]]) #[[ATTR0]] { +// CHECK-NOFP16-NEXT: entry: +// CHECK-NOFP16-NEXT: [[TMP0:%.*]] = bitcast <4 x i32> [[A_COERCE]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP1:%.*]] = bitcast <8 x half> [[TMP0]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP2:%.*]] = bitcast <4 x i32> [[TMP1]] to <8 x half> +// CHECK-NOFP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP2]], <8 x i32> +// CHECK-NOFP16-NEXT: [[TMP3:%.*]] = bitcast <8 x half> [[SHUFFLE_I]] to <4 x i32> +// CHECK-NOFP16-NEXT: [[TMP4:%.*]] = bitcast <4 x i32> [[TMP3]] to <8 x half> +// CHECK-NOFP16-NEXT: [[TMP5:%.*]] = bitcast <8 x half> [[TMP4]] to <4 x i32> +// CHECK-NOFP16-NEXT: ret <4 x i32> [[TMP5]] +// +// CHECK-FP16-LABEL: define dso_local <8 x half> @test_vrev64q_f16( +// CHECK-FP16-SAME: <8 x half> noundef [[A:%.*]]) #[[ATTR0]] { +// CHECK-FP16-NEXT: entry: +// CHECK-FP16-NEXT: [[SHUFFLE_I:%.*]] = shufflevector <8 x half> [[A]], <8 x half> [[A]], <8 x i32> +// CHECK-FP16-NEXT: ret <8 x half> [[SHUFFLE_I]] +// +float16x8_t test_vrev64q_f16(float16x8_t a) { + return vrev64q_f16(a); +} +//. +// CHECK-NOFP16: [[META3]] = !{[[META4:![0-9]+]]} +// CHECK-NOFP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} +// CHECK-NOFP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} +// CHECK-NOFP16: [[META6]] = !{[[META7:![0-9]+]]} +// CHECK-NOFP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} +// CHECK-NOFP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} +// CHECK-NOFP16: [[META9]] = !{[[META10:![0-9]+]]} +// CHECK-NOFP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} +// CHECK-NOFP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} +// CHECK-NOFP16: [[META12]] = !{[[META13:![0-9]+]]} +// CHECK-NOFP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} +// CHECK-NOFP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} +// CHECK-NOFP16: [[META15]] = !{[[META16:![0-9]+]]} +// CHECK-NOFP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} +// CHECK-NOFP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} +// CHECK-NOFP16: [[META18]] = !{[[META19:![0-9]+]]} +// CHECK-NOFP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} +// CHECK-NOFP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} +//. +// CHECK-FP16: [[META3]] = !{[[META4:![0-9]+]]} +// CHECK-FP16: [[META4]] = distinct !{[[META4]], [[META5:![0-9]+]], !"vzip_f16: %agg.result"} +// CHECK-FP16: [[META5]] = distinct !{[[META5]], !"vzip_f16"} +// CHECK-FP16: [[META6]] = !{[[META7:![0-9]+]]} +// CHECK-FP16: [[META7]] = distinct !{[[META7]], [[META8:![0-9]+]], !"vzipq_f16: %agg.result"} +// CHECK-FP16: [[META8]] = distinct !{[[META8]], !"vzipq_f16"} +// CHECK-FP16: [[META9]] = !{[[META10:![0-9]+]]} +// CHECK-FP16: [[META10]] = distinct !{[[META10]], [[META11:![0-9]+]], !"vuzp_f16: %agg.result"} +// CHECK-FP16: [[META11]] = distinct !{[[META11]], !"vuzp_f16"} +// CHECK-FP16: [[META12]] = !{[[META13:![0-9]+]]} +// CHECK-FP16: [[META13]] = distinct !{[[META13]], [[META14:![0-9]+]], !"vuzpq_f16: %agg.result"} +// CHECK-FP16: [[META14]] = distinct !{[[META14]], !"vuzpq_f16"} +// CHECK-FP16: [[META15]] = !{[[META16:![0-9]+]]} +// CHECK-FP16: [[META16]] = distinct !{[[META16]], [[META17:![0-9]+]], !"vtrn_f16: %agg.result"} +// CHECK-FP16: [[META17]] = distinct !{[[META17]], !"vtrn_f16"} +// CHECK-FP16: [[META18]] = !{[[META19:![0-9]+]]} +// CHECK-FP16: [[META19]] = distinct !{[[META19]], [[META20:![0-9]+]], !"vtrnq_f16: %agg.result"} +// CHECK-FP16: [[META20]] = distinct !{[[META20]], !"vtrnq_f16"} +//. diff --git a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c index 477da3a..c62d1c9 100644 --- a/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c +++ b/clang/test/CodeGen/arm-v8.2a-neon-intrinsics.c @@ -817,181 +817,3 @@ float16x4_t test_vmul_n_f16(float16x4_t a, float16_t b) { float16x8_t test_vmulq_n_f16(float16x8_t a, float16_t b) { return vmulq_n_f16(a, b); } - -// CHECK-LABEL: test_vbsl_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x i16> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <4 x half> %c to <8 x i8> -// CHECK: [[VBSL:%.*]] = call <8 x i8> @llvm.arm.neon.vbsl.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]], <8 x i8> [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[VBSL]] to <4 x half> -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vbsl_f16(uint16x4_t a, float16x4_t b, float16x4_t c) { - return vbsl_f16(a, b, c); -} - -// CHECK-LABEL: test_vbslq_f16 -// CHECK: [[TMP0:%.*]] = bitcast <8 x i16> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x half> %c to <16 x i8> -// CHECK: [[VBSL:%.*]] = call <16 x i8> @llvm.arm.neon.vbsl.v16i8(<16 x i8> [[TMP0]], <16 x i8> [[TMP1]], <16 x i8> [[TMP2]]) -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[VBSL]] to <8 x half> -// CHECK: ret <8 x half> [[TMP3]] -float16x8_t test_vbslq_f16(uint16x8_t a, float16x8_t b, float16x8_t c) { - return vbslq_f16(a, b, c); -} - -// CHECK-LABEL: test_vzip_f16 -// CHECK: [[VZIP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VZIP0]], ptr [[addr1:%.*]] -// CHECK: [[VZIP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VZIP1]], ptr [[addr2:%.*]] -float16x4x2_t test_vzip_f16(float16x4_t a, float16x4_t b) { - return vzip_f16(a, b); -} - -// CHECK-LABEL: test_vzipq_f16 -// CHECK: [[VZIP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VZIP0]], ptr [[addr1:%.*]] -// CHECK: [[VZIP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VZIP1]], ptr [[addr2:%.*]] -float16x8x2_t test_vzipq_f16(float16x8_t a, float16x8_t b) { - return vzipq_f16(a, b); -} - -// CHECK-LABEL: test_vuzp_f16 -// CHECK: [[VUZP0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VUZP0]], ptr [[addr1:%.*]] -// CHECK: [[VUZP1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VUZP1]], ptr [[addr1:%.*]] -float16x4x2_t test_vuzp_f16(float16x4_t a, float16x4_t b) { - return vuzp_f16(a, b); -} - -// CHECK-LABEL: test_vuzpq_f16 -// CHECK: [[VUZP0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VUZP0]], ptr [[addr1:%.*]] -// CHECK: [[VUZP1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VUZP1]], ptr [[addr2:%.*]] -float16x8x2_t test_vuzpq_f16(float16x8_t a, float16x8_t b) { - return vuzpq_f16(a, b); -} - -// CHECK-LABEL: test_vtrn_f16 -// CHECK: [[VTRN0:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VTRN0]], ptr [[addr1:%.*]] -// CHECK: [[VTRN1:%.*]] = shufflevector <4 x half> %a, <4 x half> %b, <4 x i32> -// CHECK: store <4 x half> [[VTRN1]], ptr [[addr2:%.*]] -float16x4x2_t test_vtrn_f16(float16x4_t a, float16x4_t b) { - return vtrn_f16(a, b); -} - -// CHECK-LABEL: test_vtrnq_f16 -// CHECK: [[VTRN0:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VTRN0]], ptr [[addr1:%.*]] -// CHECK: [[VTRN1:%.*]] = shufflevector <8 x half> %a, <8 x half> %b, <8 x i32> -// CHECK: store <8 x half> [[VTRN1]], ptr [[addr2:%.*]] -float16x8x2_t test_vtrnq_f16(float16x8_t a, float16x8_t b) { - return vtrnq_f16(a, b); -} - -// CHECK-LABEL: test_vmov_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vmov_n_f16(float16_t a) { - return vmov_n_f16(a); -} - -// CHECK-LABEL: test_vmovq_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <8 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4 -// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5 -// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6 -// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7 -// CHECK: ret <8 x half> [[TMP7]] -float16x8_t test_vmovq_n_f16(float16_t a) { - return vmovq_n_f16(a); -} - -// CHECK-LABEL: test_vdup_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <4 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <4 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <4 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <4 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: ret <4 x half> [[TMP3]] -float16x4_t test_vdup_n_f16(float16_t a) { - return vdup_n_f16(a); -} - -// CHECK-LABEL: test_vdupq_n_f16 -// CHECK: [[TMP0:%.*]] = insertelement <8 x half> poison, half [[ARG:%.*]], i32 0 -// CHECK: [[TMP1:%.*]] = insertelement <8 x half> [[TMP0]], half [[ARG]], i32 1 -// CHECK: [[TMP2:%.*]] = insertelement <8 x half> [[TMP1]], half [[ARG]], i32 2 -// CHECK: [[TMP3:%.*]] = insertelement <8 x half> [[TMP2]], half [[ARG]], i32 3 -// CHECK: [[TMP4:%.*]] = insertelement <8 x half> [[TMP3]], half [[ARG]], i32 4 -// CHECK: [[TMP5:%.*]] = insertelement <8 x half> [[TMP4]], half [[ARG]], i32 5 -// CHECK: [[TMP6:%.*]] = insertelement <8 x half> [[TMP5]], half [[ARG]], i32 6 -// CHECK: [[TMP7:%.*]] = insertelement <8 x half> [[TMP6]], half [[ARG]], i32 7 -// CHECK: ret <8 x half> [[TMP7]] -float16x8_t test_vdupq_n_f16(float16_t a) { - return vdupq_n_f16(a); -} - -// CHECK-LABEL: test_vdup_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <4 x i32> -// CHECK: ret <4 x half> [[LANE]] -float16x4_t test_vdup_lane_f16(float16x4_t a) { - return vdup_lane_f16(a, 3); -} - -// CHECK-LABEL: test_vdupq_lane_f16 -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> [[A:%.*]] to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[LANE:%.*]] = shufflevector <4 x half> [[TMP1]], <4 x half> [[TMP1]], <8 x i32> -// CHECK: ret <8 x half> [[LANE]] -float16x8_t test_vdupq_lane_f16(float16x4_t a) { - return vdupq_lane_f16(a, 3); -} - -// CHECK-LABEL: @test_vext_f16( -// CHECK: [[TMP0:%.*]] = bitcast <4 x half> %a to <8 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <4 x half> %b to <8 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP0]] to <4 x half> -// CHECK: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP1]] to <4 x half> -// CHECK: [[VEXT:%.*]] = shufflevector <4 x half> [[TMP2]], <4 x half> [[TMP3]], <4 x i32> -// CHECK: ret <4 x half> [[VEXT]] -float16x4_t test_vext_f16(float16x4_t a, float16x4_t b) { - return vext_f16(a, b, 2); -} - -// CHECK-LABEL: @test_vextq_f16( -// CHECK: [[TMP0:%.*]] = bitcast <8 x half> %a to <16 x i8> -// CHECK: [[TMP1:%.*]] = bitcast <8 x half> %b to <16 x i8> -// CHECK: [[TMP2:%.*]] = bitcast <16 x i8> [[TMP0]] to <8 x half> -// CHECK: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP1]] to <8 x half> -// CHECK: [[VEXT:%.*]] = shufflevector <8 x half> [[TMP2]], <8 x half> [[TMP3]], <8 x i32> -// CHECK: ret <8 x half> [[VEXT]] -float16x8_t test_vextq_f16(float16x8_t a, float16x8_t b) { - return vextq_f16(a, b, 5); -} - -// CHECK-LABEL: @test_vrev64_f16( -// CHECK: [[SHFL:%.*]] = shufflevector <4 x half> %a, <4 x half> %a, <4 x i32> -// CHECK: ret <4 x half> [[SHFL]] -float16x4_t test_vrev64_f16(float16x4_t a) { - return vrev64_f16(a); -} - -// CHECK-LABEL: @test_vrev64q_f16( -// CHECK: [[SHFL:%.*]] = shufflevector <8 x half> %a, <8 x half> %a, <8 x i32> -// CHECK: ret <8 x half> [[SHFL]] -float16x8_t test_vrev64q_f16(float16x8_t a) { - return vrev64q_f16(a); -} -- cgit v1.1 From 5bbce06ac642bedcb93158ed04253cf6deedf5e6 Mon Sep 17 00:00:00 2001 From: Lei Wang Date: Wed, 3 Apr 2024 11:18:29 -0700 Subject: [PseudoProbe] Mix block and call probe ID in lexical order (#75092) Before all the call probe ids are after block ids, in this change, it mixed the call probe and block probe by reordering them in lexical(line-number) order. For example: ``` main(): BB1 if(...) BB2 foo(..); else BB3 bar(...); BB4 ``` Before the profile is ``` main 1: .. 2: .. 3: ... 4: ... 5: foo ... 6: bar ... ``` Now the new order is ``` main 1: .. 2: .. 3: foo ... 4: ... 5: bar ... 6: ... ``` This can potentially make it more tolerant of profile mismatch, either from stale profile or frontend change. e.g. before if we add one block, even the block is the last one, all the call probes are shifted and mismatched. Moreover, this makes better use of call-anchor based stale profile matching. Blocks are matched based on the closest anchor, there would be more anchors used for the matching, reduce the mismatch scope. --- clang/test/CodeGen/pseudo-probe-emit.c | 8 ++++---- llvm/include/llvm/ProfileData/SampleProf.h | 4 ++-- .../llvm/Transforms/IPO/SampleProfileProbe.h | 6 ++---- llvm/lib/Transforms/IPO/SampleProfileProbe.cpp | 22 +++++++--------------- .../SampleProfile/Inputs/pseudo-probe-profile.prof | 8 ++++---- .../SampleProfile/Inputs/pseudo-probe-update.prof | 8 ++++---- .../SampleProfile/pseudo-probe-dangle.ll | 12 ++++++------ .../SampleProfile/pseudo-probe-discriminator.ll | 6 +++--- .../SampleProfile/pseudo-probe-invoke.ll | 12 ++++++++---- .../pseudo-probe-profile-metadata-2.ll | 15 +++++++-------- .../SampleProfile/pseudo-probe-profile.ll | 22 +++++++++++----------- .../SampleProfile/pseudo-probe-update.ll | 11 +++++------ .../SampleProfile/pseudo-probe-verify.ll | 16 ++++++++-------- 13 files changed, 71 insertions(+), 79 deletions(-) diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c index c7a3f7e..360f831e 100644 --- a/clang/test/CodeGen/pseudo-probe-emit.c +++ b/clang/test/CodeGen/pseudo-probe-emit.c @@ -10,9 +10,9 @@ void foo(int x) { // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1) if (x == 0) // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1) - bar(); + bar(); // probe id : 3 else - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1) - go(); - // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1) + go(); // probe id : 5 + // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1) } diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h index 8ac84d4..51d590b 100644 --- a/llvm/include/llvm/ProfileData/SampleProf.h +++ b/llvm/include/llvm/ProfileData/SampleProf.h @@ -466,7 +466,7 @@ struct SampleContextFrame { LineLocation Location; SampleContextFrame() : Location(0, 0) {} - + SampleContextFrame(FunctionId Func, LineLocation Location) : Func(Func), Location(Location) {} @@ -527,7 +527,7 @@ public: : Func(Name), State(UnknownContext), Attributes(ContextNone) { assert(!Name.empty() && "Name is empty"); } - + SampleContext(FunctionId Func) : Func(Func), State(UnknownContext), Attributes(ContextNone) {} diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h index 03aa93c..7f2cc0e 100644 --- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h +++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h @@ -85,14 +85,12 @@ private: void findInvokeNormalDests(DenseSet &InvokeNormalDests); void computeBlocksToIgnore(DenseSet &BlocksToIgnore, DenseSet &BlocksAndCallsToIgnore); - void computeProbeIdForCallsites( - const DenseSet &BlocksAndCallsToIgnore); const Instruction * getOriginalTerminator(const BasicBlock *Head, const DenseSet &BlocksToIgnore); void computeCFGHash(const DenseSet &BlocksToIgnore); - void computeProbeIdForBlocks(const DenseSet &BlocksToIgnore); - void computeProbeIdForCallsites(); + void computeProbeId(const DenseSet &BlocksToIgnore, + const DenseSet &BlocksAndCallsToIgnore); Function *F; diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp index 4d0fa24..9a191b0 100644 --- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp @@ -178,8 +178,7 @@ SampleProfileProber::SampleProfileProber(Function &Func, DenseSet BlocksAndCallsToIgnore; computeBlocksToIgnore(BlocksToIgnore, BlocksAndCallsToIgnore); - computeProbeIdForBlocks(BlocksToIgnore); - computeProbeIdForCallsites(BlocksAndCallsToIgnore); + computeProbeId(BlocksToIgnore, BlocksAndCallsToIgnore); computeCFGHash(BlocksToIgnore); } @@ -300,27 +299,20 @@ void SampleProfileProber::computeCFGHash( << ", Hash = " << FunctionHash << "\n"); } -void SampleProfileProber::computeProbeIdForBlocks( - const DenseSet &BlocksToIgnore) { - for (auto &BB : *F) { - if (BlocksToIgnore.contains(&BB)) - continue; - BlockProbeIds[&BB] = ++LastProbeId; - } -} - -void SampleProfileProber::computeProbeIdForCallsites( +void SampleProfileProber::computeProbeId( + const DenseSet &BlocksToIgnore, const DenseSet &BlocksAndCallsToIgnore) { LLVMContext &Ctx = F->getContext(); Module *M = F->getParent(); for (auto &BB : *F) { + if (!BlocksToIgnore.contains(&BB)) + BlockProbeIds[&BB] = ++LastProbeId; + if (BlocksAndCallsToIgnore.contains(&BB)) continue; for (auto &I : BB) { - if (!isa(I)) - continue; - if (isa(&I)) + if (!isa(I) || isa(&I)) continue; // The current implementation uses the lower 16 bits of the discriminator diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof index ba4c611..d384794 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 _Z3barv:2 _Z3foov:5 - 6: 6 _Z3barv:4 _Z3foov:2 + 4: 6 + 6: 13 + 3: 7 _Z3barv:2 _Z3foov:5 + 5: 6 _Z3barv:4 _Z3foov:2 !CFGChecksum: 563022570642068 diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof index 62f9bd5..213bf0b 100644 --- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof +++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof @@ -1,8 +1,8 @@ foo:3200:13 1: 13 2: 7 - 3: 6 - 4: 13 - 5: 7 - 6: 6 + 4: 6 + 6: 13 + 7: 7 + 9: 6 !CFGChecksum: 844530426352218 diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll index 4647a34f..f0b6fdf 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll @@ -23,21 +23,21 @@ Merge: ; JT-LABEL-NO: T ; JT-LABEL-NO: F ; JT-LABEL: Merge +; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4 ; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3 -; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2 -; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) +; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; ASM-NOT: .pseudoprobe 6699318081062747564 4 ; ASM-NOT: .pseudoprobe 6699318081062747564 3 -; ASM-NOT: .pseudoprobe 6699318081062747564 2 -; ASM: .pseudoprobe 6699318081062747564 4 0 0 +; ASM: .pseudoprobe 6699318081062747564 5 0 0 ret i32 %call } ;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too. ; MIR-tail: bb.0 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0 -; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2 ; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3 -; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0 +; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4 +; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0 define i32 @test(i32 %a, i32 %b, i32 %c) { diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll index 62f0737..97b0ed6 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll @@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) - + ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]]) -; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575) +; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559) ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]]) -; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583) +; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567) ; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]]) ; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4) diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll index 822ab40..03bb64b 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll @@ -18,10 +18,12 @@ entry: if.then: ; preds = %entry ; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2 +; callsite probe 3 invoke void @_Z3foov() to label %invoke.cont unwind label %terminate.lpad, !dbg !24 invoke.cont: ; preds = %if.then +; callsite probe 4 ; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844, invoke void @_Z3bazv() to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26 @@ -31,7 +33,8 @@ invoke.cont1: ; preds = %invoke.cont br label %if.end, !dbg !27 if.else: ; preds = %entry -; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5 +; callsite probe 6 invoke void @_Z3foov() to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28 @@ -40,7 +43,8 @@ invoke.cont2: ; preds = %if.else br label %if.end if.end: ; preds = %invoke.cont2, %invoke.cont1 -; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 7 +; callsite probe 8 invoke void @_Z3foov() to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29 @@ -51,14 +55,14 @@ invoke.cont3: ; preds = %if.end br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32 if.then5: ; preds = %invoke.cont3 -; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 9 %2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19 %inc = add nsw i32 %2, 1, !dbg !33 store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19 br label %if.end6, !dbg !35 if.end6: ; preds = %if.then5, %invoke.cont3 -; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6 +; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 10 ret void, !dbg !36 terminate.lpad: ; preds = %if.end, %if.else, %invoke.cont, %if.then diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll index 148f3ed..379dcfc 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll @@ -29,7 +29,7 @@ if.else: br label %return return: - call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1) + call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"} !9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug) !10 = !{!"function_entry_count", i64 14} !11 = !{!"branch_weights", i32 100, i32 0} -;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 1.0. -!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575) +;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 1.0. +!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559) !13 = distinct !DILocation(line: 10, column: 11, scope: !12) -;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe -;; with an index of 5 and probe factor of 0. -!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775) +;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe +;; with an index of 3 and probe factor of 0. +!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759) !15 = distinct !DILocation(line: 10, column: 11, scope: !14) !16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll index 474b666..867a49d 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll @@ -22,12 +22,12 @@ if.then: if.else: ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]] call void %f(i32 2) - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) store i32 2, ptr %retval, align 4 br label %return return: - ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) + ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) %1 = load i32, ptr %retval, align 4 ret i32 %1 } @@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"} ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7} ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]]) +;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe +;; with an index of 3. +; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695) +; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} ;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe ;; with an index of 5. -; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) -; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2} -;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe -;; with an index of 6. ; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]]) -; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719) +; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711) ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2} !llvm.module.flags = !{!9, !10} @@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '7' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '5' +;YAML-NEXT: - ProbeId: '3' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '6' +;YAML-NEXT: - ProbeId: '5' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '6' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '3' +;YAML-NEXT: - ProbeId: '4' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' @@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"} ;YAML-NEXT: - String: 'Applied ' ;YAML-NEXT: - NumSamples: '13' ;YAML-NEXT: - String: ' samples from profile (ProbeId=' -;YAML-NEXT: - ProbeId: '4' +;YAML-NEXT: - ProbeId: '6' ;YAML-NEXT: - String: ', Factor=' ;YAML-NEXT: - Factor: '1.000000e+00' ;YAML-NEXT: - String: ', OriginalSamples=' diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll index 992afed..217b619 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll @@ -14,15 +14,15 @@ T1: %v1 = call i32 @f1() ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) ;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080) %cond3 = icmp eq i32 %v1, 412 br label %Merge F1: ; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]] %v2 = call i32 @f2() -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) ;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7. -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968) br label %Merge Merge: @@ -30,11 +30,11 @@ Merge: %B = phi i32 [%v1, %T1], [%v2, %F1] br i1 %A, label %T2, label %F2 T2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1) call void @f3() ret i32 %B F2: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1) ret i32 %B } @@ -42,4 +42,3 @@ F2: ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6} attributes #0 = {"use-sample-profile"} - diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll index f70e518..b622cfb 100644 --- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll +++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll @@ -4,7 +4,7 @@ ; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass *** ; VERIFY: Function foo: -; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00 +; VERIFY-DAG: Probe 5 previous factor 1.00 current factor 5.00 ; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00 declare void @foo2() nounwind @@ -27,15 +27,15 @@ bb7.preheader: bb10: ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1) -; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] +; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1) %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ] %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ] @@ -50,14 +50,14 @@ bb10: br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13 bb24: -; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1) +; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1) ret void } ;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe ;; with an index of 6 and a scale of -1%. ; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]]) -; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583) +; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575) !llvm.dbg.cu = !{!0} !llvm.module.flags = !{!9, !10} -- cgit v1.1 From d57884011e8c57b118b831614b692ba4bc8b5aca Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 14:28:36 -0400 Subject: [SLP]Add support for commutative intrinsics. Implemented long-standing TODO to support commutative intrinsics. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/86316 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 44 ++++++++++++++++++---- .../AArch64/vec3-reorder-reshuffle.ll | 23 ++++++----- .../SLPVectorizer/X86/horizontal-minmax.ll | 2 +- .../SLPVectorizer/X86/scatter-vectorize-reorder.ll | 6 +-- .../SLPVectorizer/X86/vec3-reorder-reshuffle.ll | 4 +- .../SLPVectorizer/slp-umax-rdx-matcher-crash.ll | 2 +- 6 files changed, 54 insertions(+), 27 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9f8bc552..9b87e6e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -306,10 +306,7 @@ static bool isCommutative(Instruction *I) { return Cmp->isCommutative(); if (auto *BO = dyn_cast(I)) return BO->isCommutative(); - // TODO: This should check for generic Instruction::isCommutative(), but - // we need to confirm that the caller code correctly handles Intrinsics - // for example (does not have 2 operands). - return false; + return I->isCommutative(); } /// \returns inserting index of InsertElement or InsertValue instruction, @@ -1975,6 +1972,9 @@ public: "Expected same number of lanes"); assert(isa(VL[0]) && "Expected instruction"); unsigned NumOperands = cast(VL[0])->getNumOperands(); + constexpr unsigned IntrinsicNumOperands = 2; + if (auto *CI = dyn_cast(VL[0])) + NumOperands = IntrinsicNumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); for (unsigned OpIdx = 0; OpIdx != NumOperands; ++OpIdx) { @@ -3420,10 +3420,11 @@ private: // immediates do not affect scheduler behavior this is considered // okay. auto *In = BundleMember->Inst; - assert(In && - (isa(In) || - In->getNumOperands() == TE->getNumOperands()) && - "Missed TreeEntry operands?"); + assert( + In && + (isa(In) || + In->getNumOperands() == TE->getNumOperands()) && + "Missed TreeEntry operands?"); (void)In; // fake use to avoid build failure when assertions disabled for (unsigned OpIdx = 0, NumOperands = TE->getNumOperands(); @@ -6798,6 +6799,33 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); + // Sort operands of the instructions so that each side is more likely to + // have the same opcode. + if (isCommutative(VL0)) { + ValueList Left, Right; + reorderInputsAccordingToOpcode(VL, Left, Right, *this); + TE->setOperand(0, Left); + TE->setOperand(1, Right); + SmallVector Operands; + for (unsigned I : seq(2, CI->arg_size())) { + Operands.emplace_back(); + if (isVectorIntrinsicWithScalarOpAtArg(ID, I)) + continue; + for (Value *V : VL) { + auto *CI2 = cast(V); + Operands.back().push_back(CI2->getArgOperand(I)); + } + TE->setOperand(I, Operands.back()); + } + buildTree_rec(Left, Depth + 1, {TE, 0}); + buildTree_rec(Right, Depth + 1, {TE, 1}); + for (unsigned I : seq(2, CI->arg_size())) { + if (Operands[I - 2].empty()) + continue; + buildTree_rec(Operands[I - 2], Depth + 1, {TE, I}); + } + return; + } TE->setOperandsInOrder(); for (unsigned I : seq(0, CI->arg_size())) { // For scalar operands no need to create an entry since no need to diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll index 89ea15d..e492596 100644 --- a/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vec3-reorder-reshuffle.ll @@ -142,17 +142,16 @@ define void @gather_2(ptr %mat1, float %0, float %1) { ; CHECK-LABEL: define void @gather_2( ; CHECK-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float 0.000000e+00, float 0.000000e+00) -; CHECK-NEXT: [[TMP3:%.*]] = call float @llvm.fmuladd.f32(float [[TMP1]], float [[TMP0]], float 0.000000e+00) +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> , float [[TMP1]], i32 1 +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP6]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float 0.000000e+00) -; CHECK-NEXT: [[TMP5:%.*]] = fmul float [[TMP2]], 0.000000e+00 -; CHECK-NEXT: [[TMP6:%.*]] = fmul float [[TMP3]], 0.000000e+00 ; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP4]], 0.000000e+00 ; CHECK-NEXT: [[ARRAYIDX163:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1 -; CHECK-NEXT: [[ARRAYIDX2_I_I_I278:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 1 ; CHECK-NEXT: [[ARRAYIDX5_I_I_I280:%.*]] = getelementptr [4 x [4 x float]], ptr [[MAT1]], i64 0, i64 1, i64 2 -; CHECK-NEXT: store float [[TMP5]], ptr [[ARRAYIDX163]], align 4 -; CHECK-NEXT: store float [[TMP6]], ptr [[ARRAYIDX2_I_I_I278]], align 4 +; CHECK-NEXT: [[TMP8:%.*]] = fmul <2 x float> [[TMP5]], zeroinitializer +; CHECK-NEXT: store <2 x float> [[TMP8]], ptr [[ARRAYIDX163]], align 4 ; CHECK-NEXT: store float [[TMP7]], ptr [[ARRAYIDX5_I_I_I280]], align 4 ; CHECK-NEXT: ret void ; @@ -358,12 +357,12 @@ define void @reuse_shuffle_indices_cost_crash_2(ptr %bezt, float %0) { ; CHECK-NEXT: [[FNEG:%.*]] = fmul float [[TMP0]], 0.000000e+00 ; CHECK-NEXT: [[TMP1:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00) ; CHECK-NEXT: store float [[TMP1]], ptr [[BEZT]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = tail call float @llvm.fmuladd.f32(float [[TMP0]], float [[FNEG]], float 0.000000e+00) ; CHECK-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr float, ptr [[BEZT]], i64 1 -; CHECK-NEXT: store float [[TMP2]], ptr [[ARRAYIDX5_I]], align 4 -; CHECK-NEXT: [[TMP3:%.*]] = tail call float @llvm.fmuladd.f32(float [[FNEG]], float 0.000000e+00, float 0.000000e+00) -; CHECK-NEXT: [[ARRAYIDX8_I831:%.*]] = getelementptr float, ptr [[BEZT]], i64 2 -; CHECK-NEXT: store float [[TMP3]], ptr [[ARRAYIDX8_I831]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> , float [[TMP0]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> poison, float [[FNEG]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP2]], <2 x float> [[TMP4]], <2 x float> zeroinitializer) +; CHECK-NEXT: store <2 x float> [[TMP5]], ptr [[ARRAYIDX5_I]], align 4 ; CHECK-NEXT: ret void ; entry: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll index 66e3fbf..4cc3c12 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/horizontal-minmax.ll @@ -1295,7 +1295,7 @@ define i8 @umin_intrinsic_rdx_v16i8(ptr %p0) { define void @PR49730() { ; CHECK-LABEL: @PR49730( -; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP1:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> , <4 x i32> ) ; CHECK-NEXT: [[TMP2:%.*]] = sub nsw <4 x i32> undef, [[TMP1]] ; CHECK-NEXT: [[T12:%.*]] = sub nsw i32 undef, undef ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP2]]) diff --git a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll index fb2b653..82085ad 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/scatter-vectorize-reorder.ll @@ -12,10 +12,10 @@ define void @test() { ; CHECK-NEXT: [[TMP1:%.*]] = fsub <2 x float> zeroinitializer, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = load float, ptr [[ARRAYIDX10_I_I86]], align 4 ; CHECK-NEXT: [[TMP3:%.*]] = load float, ptr undef, align 4 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> , <2 x i32> -; CHECK-NEXT: [[TMP5:%.*]] = insertelement <2 x float> poison, float [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[TMP2]], i32 0 +; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <2 x float> [[TMP0]], <2 x float> poison, <2 x i32> ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <2 x float> [[TMP5]], float [[TMP3]], i32 1 -; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP6]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <2 x float> [[TMP4]], <2 x float> , <2 x i32> ; CHECK-NEXT: [[TMP8:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP4]], <2 x float> [[TMP6]], <2 x float> [[TMP7]]) ; CHECK-NEXT: br i1 false, label [[BB2:%.*]], label [[BB3:%.*]] ; CHECK: bb2: diff --git a/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll b/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll index 46cca9b..1faeea7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/vec3-reorder-reshuffle.ll @@ -142,8 +142,8 @@ define void @gather_2(ptr %mat1, float %0, float %1) { ; CHECK-SAME: ptr [[MAT1:%.*]], float [[TMP0:%.*]], float [[TMP1:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i32 0 -; CHECK-NEXT: [[TMP3:%.*]] = insertelement <2 x float> [[TMP2]], float [[TMP1]], i32 1 -; CHECK-NEXT: [[TMP4:%.*]] = shufflevector <2 x float> [[TMP3]], <2 x float> , <2 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <2 x float> [[TMP2]], <2 x float> poison, <2 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x float> , float [[TMP1]], i32 1 ; CHECK-NEXT: [[TMP5:%.*]] = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> [[TMP3]], <2 x float> [[TMP4]], <2 x float> zeroinitializer) ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fmuladd.f32(float [[TMP0]], float [[TMP1]], float 0.000000e+00) ; CHECK-NEXT: [[TMP7:%.*]] = fmul float [[TMP6]], 0.000000e+00 diff --git a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll index 66229c2..8b131cc 100644 --- a/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll +++ b/llvm/test/Transforms/SLPVectorizer/slp-umax-rdx-matcher-crash.ll @@ -43,7 +43,7 @@ declare i32 @llvm.umin.i32(i32, i32) define void @test2() { ; CHECK-LABEL: @test2( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> undef, <4 x i32> ) +; CHECK-NEXT: [[TMP0:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> , <4 x i32> ) ; CHECK-NEXT: [[TMP1:%.*]] = sub nsw <4 x i32> undef, [[TMP0]] ; CHECK-NEXT: [[TMP2:%.*]] = call i32 @llvm.vector.reduce.umin.v4i32(<4 x i32> [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 77) -- cgit v1.1 From b15d27e24902444129bfec4095d68bf80f3af700 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 19:29:01 +0100 Subject: [VectorCombine][X86] Add additional tests for #87510 Add zext nneg tests and check we don't fold casts with different src types --- .../VectorCombine/X86/shuffle-of-casts.ll | 42 ++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index 3a7c331..b922528 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -17,6 +17,33 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ret <16 x i32> %r } +define <16 x i32> @concat_zext_nneg_v8i8_v16i32(<8 x i8> %a0, <8 x i8> %a1) { +; CHECK-LABEL: @concat_zext_nneg_v8i8_v16i32( +; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i8> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i8> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] +; + %x0 = zext nneg <8 x i8> %a0 to <8 x i32> + %x1 = zext nneg <8 x i8> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r +} + +; TODO - sext + zext nneg -> sext +define <8 x i32> @concat_sext_zext_nneg_v4i8_v8i32(<4 x i8> %a0, <4 x i8> %a1) { +; CHECK-LABEL: @concat_sext_zext_nneg_v4i8_v8i32( +; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext nneg <4 x i8> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %x0 = sext <4 x i8> %a0 to <4 x i32> + %x1 = zext nneg <4 x i8> %a1 to <4 x i32> + %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> + ret <8 x i32> %r +} + define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @concat_sext_v8i16_v16i32( ; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> @@ -170,6 +197,21 @@ define <8 x float> @concat_bitcast_v4i32_v8f32(<4 x i32> %a0, <4 x i32> %a1) { ret <8 x float> %r } +; negative - src type mismatch + +define <8 x i32> @concat_sext_v4i8_v4i16_v8i32(<4 x i8> %a0, <4 x i16> %a1) { +; CHECK-LABEL: @concat_sext_v4i8_v4i16_v8i32( +; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0:%.*]] to <4 x i32> +; CHECK-NEXT: [[X1:%.*]] = sext <4 x i16> [[A1:%.*]] to <4 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> +; CHECK-NEXT: ret <8 x i32> [[R]] +; + %x0 = sext <4 x i8> %a0 to <4 x i32> + %x1 = sext <4 x i16> %a1 to <4 x i32> + %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> + ret <8 x i32> %r +} + ; negative - castop mismatch define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -- cgit v1.1 From 52ae02db4044b5d6e55b48133ac641b0c998ef49 Mon Sep 17 00:00:00 2001 From: David Green Date: Wed, 3 Apr 2024 19:31:25 +0100 Subject: [AArch64] Add a test for non-temporal masked loads / stores. NFC --- .../CodeGen/AArch64/sve-nontemporal-masked-ldst.ll | 75 ++++++++++++++++++++++ 1 file changed, 75 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll diff --git a/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll new file mode 100644 index 0000000..bcfc7b3 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/sve-nontemporal-masked-ldst.ll @@ -0,0 +1,75 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s | FileCheck %s + +define <4 x i32> @masked_load_v4i32(ptr %a, <4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_load_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v0.4s, v0.4h, #0 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: shl v0.4s, v0.4s, #31 +; CHECK-NEXT: cmlt v0.4s, v0.4s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z0.s, #0 +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: // kill: def $q0 killed $q0 killed $z0 +; CHECK-NEXT: ret + %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> %mask, <4 x i32> undef), !nontemporal !0 + ret <4 x i32> %load +} + +define void @masked_store_v4i32(<4 x i32> %x, ptr %a, <4 x i1> %mask) nounwind { +; CHECK-LABEL: masked_store_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ushll v1.4s, v1.4h, #0 +; CHECK-NEXT: ptrue p0.s, vl4 +; CHECK-NEXT: // kill: def $q0 killed $q0 def $z0 +; CHECK-NEXT: shl v1.4s, v1.4s, #31 +; CHECK-NEXT: cmlt v1.4s, v1.4s, #0 +; CHECK-NEXT: cmpne p0.s, p0/z, z1.s, #0 +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> %mask), !nontemporal !0 + ret void +} + +define <4 x i32> @load_v4i32(ptr %a) nounwind { +; CHECK-LABEL: load_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ldr q0, [x0] +; CHECK-NEXT: ret + %load = call <4 x i32> @llvm.masked.load.v4i32(ptr %a, i32 1, <4 x i1> , <4 x i32> undef), !nontemporal !0 + ret <4 x i32> %load +} + +define void @store_v4i32(<4 x i32> %x, ptr %a) nounwind { +; CHECK-LABEL: store_v4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.v4i32.p0(<4 x i32> %x, ptr %a, i32 1, <4 x i1> ), !nontemporal !0 + ret void +} + +define @masked_load_nxv4i32(ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_load_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0] +; CHECK-NEXT: ret + %load = call @llvm.masked.load.nxv4i32(ptr %a, i32 1, %mask, undef), !nontemporal !0 + ret %load +} + +define void @masked_store_nxv4i32( %x, ptr %a, %mask) nounwind { +; CHECK-LABEL: masked_store_nxv4i32: +; CHECK: // %bb.0: +; CHECK-NEXT: st1w { z0.s }, p0, [x0] +; CHECK-NEXT: ret + call void @llvm.masked.store.nxv4i32.p0( %x, ptr %a, i32 1, %mask), !nontemporal !0 + ret void +} + +declare @llvm.masked.load.nxv4i32(ptr, i32, , ) +declare void @llvm.masked.store.nxv4i32.p0(, ptr, i32, ) +declare <4 x i32> @llvm.masked.load.v4i32(ptr, i32, <4 x i1>, <4 x i32>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) + +!0 = !{i32 1} -- cgit v1.1 From 7c68a958e2213a5190d91ce6dddddb72ce732f1e Mon Sep 17 00:00:00 2001 From: Changpeng Fang Date: Wed, 3 Apr 2024 11:35:09 -0700 Subject: AMDGPU: Use PseudoInstr to name SIMCInstr for DSDIR and SOPs, NFC (#87537) We should consistently use PseudoInstr instead of Mnemonic to name SIMCInstr, even though they may be the same in most cases --- llvm/lib/Target/AMDGPU/DSDIRInstructions.td | 2 +- llvm/lib/Target/AMDGPU/SOPInstructions.td | 78 ++++++++++++++--------------- 2 files changed, 40 insertions(+), 40 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td index f4f02d2..0541f0f 100644 --- a/llvm/lib/Target/AMDGPU/DSDIRInstructions.td +++ b/llvm/lib/Target/AMDGPU/DSDIRInstructions.td @@ -112,7 +112,7 @@ class DSDIR_Real : lds.Mnemonic # asm, ins, lds.is_direct>, - SIMCInstr { + SIMCInstr { let isPseudo = 0; let isCodeGenOnly = 0; diff --git a/llvm/lib/Target/AMDGPU/SOPInstructions.td b/llvm/lib/Target/AMDGPU/SOPInstructions.td index d34ee34..0b7d45e 100644 --- a/llvm/lib/Target/AMDGPU/SOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/SOPInstructions.td @@ -1972,7 +1972,7 @@ class Select_gfx6_gfx7 : SIMCInstr { multiclass SOP1_Real_gfx11 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx11 : SOP1_Real, - Select_gfx11; + Select_gfx11; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX11Only]>; } @@ -1980,14 +1980,14 @@ multiclass SOP1_Real_gfx11 op, string name = !tolower(NAME)> { multiclass SOP1_Real_gfx12 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx12 : SOP1_Real, - Select_gfx12; + Select_gfx12; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX12Plus]>; } multiclass SOP1_M0_Real_gfx12 op> { def _gfx12 : SOP1_Real(NAME)>, - Select_gfx12(NAME).Mnemonic> { + Select_gfx12(NAME).PseudoInstr> { let Inst{7-0} = M0_gfx11plus.HWEncoding{7-0}; // Set Src0 encoding to M0 } } @@ -1995,7 +1995,7 @@ multiclass SOP1_M0_Real_gfx12 op> { multiclass SOP1_IMM_Real_gfx12 op> { defvar ps = !cast(NAME); def _gfx12 : SOP1_Real, - Select_gfx12; + Select_gfx12; } multiclass SOP1_Real_gfx11_gfx12 op, string name = !tolower(NAME)> : @@ -2106,7 +2106,7 @@ defm S_RNDNE_F16 : SOP1_Real_gfx11_gfx12<0x06e>; multiclass SOP1_Real_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOP1_Real, - Select_gfx10; + Select_gfx10; } multiclass SOP1_Real_gfx10_gfx11_gfx12 op> : @@ -2139,7 +2139,7 @@ defm S_MOVRELSD_2_B32 : SOP1_Real_gfx10<0x049>; multiclass SOP1_Real_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOP1_Real, - Select_gfx6_gfx7; + Select_gfx6_gfx7; } multiclass SOP1_Real_gfx6_gfx7_gfx10 op> : @@ -2205,7 +2205,7 @@ defm S_ABS_I32 : SOP1_Real_gfx6_gfx7_gfx10<0x034>; multiclass SOP2_Real_gfx12 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx12 : SOP2_Real32, - Select_gfx12; + Select_gfx12; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX12Plus]>; } @@ -2222,7 +2222,7 @@ defm S_MAXIMUM_F16 : SOP2_Real_gfx12<0x052>; multiclass SOP2_Real_gfx11 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx11 : SOP2_Real32, - Select_gfx11; + Select_gfx11; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX11Only]>; } @@ -2283,12 +2283,12 @@ defm S_MUL_U64 : SOP2_Real_gfx12<0x055>; multiclass SOP2_Real_FMAK_gfx12 op> { def _gfx12 : SOP2_Real64(NAME)>, - Select_gfx12(NAME).Mnemonic>; + Select_gfx12(NAME).PseudoInstr>; } multiclass SOP2_Real_FMAK_gfx11 op> { def _gfx11 : SOP2_Real64(NAME)>, - Select_gfx11(NAME).Mnemonic>; + Select_gfx11(NAME).PseudoInstr>; } multiclass SOP2_Real_FMAK_gfx11_gfx12 op> : @@ -2325,7 +2325,7 @@ defm S_MAX_F16 : SOP2_Real_gfx11_Renamed_gfx12<0x04c, "s_max_num_f16">; multiclass SOP2_Real_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOP2_Real32, - Select_gfx10; + Select_gfx10; } multiclass SOP2_Real_gfx10_gfx11_gfx12 op> : @@ -2348,7 +2348,7 @@ defm S_MUL_HI_I32 : SOP2_Real_gfx10<0x036>; multiclass SOP2_Real_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOP2_Real32, - Select_gfx6_gfx7; + Select_gfx6_gfx7; } multiclass SOP2_Real_gfx6_gfx7_gfx10 op> : @@ -2410,24 +2410,24 @@ defm S_ABSDIFF_I32 : SOP2_Real_gfx6_gfx7_gfx10<0x02c>; multiclass SOPK_Real32_gfx12 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx12 : SOPK_Real32, - Select_gfx12; + Select_gfx12; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX12Plus]>; } multiclass SOPK_Real32_gfx11 op> { def _gfx11 : SOPK_Real32(NAME)>, - Select_gfx11(NAME).Mnemonic>; + Select_gfx11(NAME).PseudoInstr>; } multiclass SOPK_Real64_gfx12 op> { def _gfx12 : SOPK_Real64(NAME)>, - Select_gfx12(NAME).Mnemonic>; + Select_gfx12(NAME).PseudoInstr>; } multiclass SOPK_Real64_gfx11 op> { def _gfx11 : SOPK_Real64(NAME)>, - Select_gfx11(NAME).Mnemonic>; + Select_gfx11(NAME).PseudoInstr>; } multiclass SOPK_Real32_gfx11_gfx12 op> : @@ -2454,13 +2454,13 @@ defm S_WAITCNT_LGKMCNT : SOPK_Real32_gfx11<0x01b>; multiclass SOPK_Real32_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOPK_Real32, - Select_gfx10; + Select_gfx10; } multiclass SOPK_Real64_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOPK_Real64, - Select_gfx10; + Select_gfx10; } multiclass SOPK_Real32_gfx10_gfx11 op> : @@ -2485,13 +2485,13 @@ defm S_SUBVECTOR_LOOP_END : SOPK_Real32_gfx10<0x01c>; multiclass SOPK_Real32_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOPK_Real32, - Select_gfx6_gfx7; + Select_gfx6_gfx7; } multiclass SOPK_Real64_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOPK_Real64, - Select_gfx6_gfx7; + Select_gfx6_gfx7; } multiclass SOPK_Real32_gfx6_gfx7_gfx10 op> : @@ -2539,7 +2539,7 @@ defm S_SETREG_IMM32_B32 : SOPK_Real64_gfx6_gfx7_gfx10<0x015>; multiclass SOPP_Real_32_gfx12 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx12 : SOPP_Real_32, - Select_gfx12; + Select_gfx12; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX12Plus]>; } @@ -2564,7 +2564,7 @@ defm S_WAIT_STORECNT_DSCNT : SOPP_Real_32_gfx12<0x049>; multiclass SOPP_Real_32_gfx11 op, string name = !tolower(NAME)> { defvar ps = !cast(NAME); def _gfx11 : SOPP_Real_32, - Select_gfx11, + Select_gfx11, SOPPRelaxTable<0, ps.KeyName, "_gfx11">; if !ne(ps.Mnemonic, name) then def : MnemonicAlias, Requires<[isGFX11Only]>; @@ -2572,13 +2572,13 @@ multiclass SOPP_Real_32_gfx11 op, string name = !tolower(NAME)> { multiclass SOPP_Real_64_gfx12 op> { def _gfx12 : SOPP_Real_64(NAME), !cast(NAME).Mnemonic>, - Select_gfx12(NAME).Mnemonic>, + Select_gfx12(NAME).PseudoInstr>, SOPPRelaxTable<1, !cast(NAME).KeyName, "_gfx12">; } multiclass SOPP_Real_64_gfx11 op> { def _gfx11 : SOPP_Real_64(NAME), !cast(NAME).Mnemonic>, - Select_gfx11(NAME).Mnemonic>, + Select_gfx11(NAME).PseudoInstr>, SOPPRelaxTable<1, !cast(NAME).KeyName, "_gfx11">; } @@ -2654,21 +2654,21 @@ defm S_SINGLEUSE_VDST : SOPP_Real_32_gfx11_gfx12<0x013>; multiclass SOPP_Real_32_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOPP_Real_32(NAME).Mnemonic>, - Select_gfx6_gfx7, + Select_gfx6_gfx7, SOPPRelaxTable<0, ps.KeyName, "_gfx6_gfx7">; } multiclass SOPP_Real_32_gfx8_gfx9 op> { defvar ps = !cast(NAME); def _vi : SOPP_Real_32, - Select_vi, + Select_vi, SOPPRelaxTable<0, ps.KeyName, "_vi">; } multiclass SOPP_Real_32_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOPP_Real_32, - Select_gfx10, + Select_gfx10, SOPPRelaxTable<0, ps.KeyName, "_gfx10">; } @@ -2691,21 +2691,21 @@ multiclass SOPP_Real_32_gfx10_gfx11_gfx12 op> : multiclass SOPP_Real_64_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOPP_Real_64, - Select_gfx6_gfx7, + Select_gfx6_gfx7, SOPPRelaxTable<1, ps.KeyName, "_gfx6_gfx7">; } multiclass SOPP_Real_64_gfx8_gfx9 op> { defvar ps = !cast(NAME); def _vi : SOPP_Real_64, - Select_vi, + Select_vi, SOPPRelaxTable<1, ps.KeyName, "_vi">; } multiclass SOPP_Real_64_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOPP_Real_64, - Select_gfx10, + Select_gfx10, SOPPRelaxTable<1, ps.KeyName, "_gfx10">; } @@ -2771,12 +2771,12 @@ defm S_CBRANCH_CDBGSYS_AND_USER : SOPP_Real_With_Relaxation_gfx6_gfx7_gfx8_gfx9_ multiclass SOPC_Real_gfx12 op> { def _gfx12 : SOPC_Real(NAME)>, - Select_gfx12(NAME).Mnemonic>; + Select_gfx12(NAME).PseudoInstr>; } multiclass SOPC_Real_gfx11 op> { def _gfx11 : SOPC_Real(NAME)>, - Select_gfx11(NAME).Mnemonic>; + Select_gfx11(NAME).PseudoInstr>; } multiclass SOPC_Real_gfx11_gfx12 op> : @@ -2826,19 +2826,19 @@ defm S_CMP_NLT_F16 : SOPC_Real_gfx11_gfx12<0x5e>; multiclass SOPC_Real_gfx6_gfx7 op> { defvar ps = !cast(NAME); def _gfx6_gfx7 : SOPC_Real, - Select_gfx6_gfx7; + Select_gfx6_gfx7; } multiclass SOPC_Real_gfx8_gfx9 op> { defvar ps = !cast(NAME); def _vi : SOPC_Real, - Select_vi; + Select_vi; } multiclass SOPC_Real_gfx10 op> { defvar ps = !cast(NAME); def _gfx10 : SOPC_Real, - Select_gfx10; + Select_gfx10; } multiclass SOPC_Real_gfx8_gfx9_gfx10 op> : @@ -2878,15 +2878,15 @@ defm S_CMP_LG_U64 : SOPC_Real_gfx8_gfx9_gfx10<0x13>; class SOP1_Real_vi op, SOP1_Pseudo ps> : SOP1_Real, - Select_vi; + Select_vi; class SOP2_Real_vi op, SOP2_Pseudo ps> : SOP2_Real32, - Select_vi; + Select_vi; class SOPK_Real_vi op, SOPK_Pseudo ps> : SOPK_Real32, - Select_vi; + Select_vi; def S_MOV_B32_vi : SOP1_Real_vi <0x00, S_MOV_B32>; def S_MOV_B64_vi : SOP1_Real_vi <0x01, S_MOV_B64>; @@ -3007,7 +3007,7 @@ def S_GETREG_B32_vi : SOPK_Real_vi <0x11, S_GETREG_B32>; def S_SETREG_B32_vi : SOPK_Real_vi <0x12, S_SETREG_B32>; //def S_GETREG_REGRD_B32_vi : SOPK_Real_vi <0x13, S_GETREG_REGRD_B32>; // see pseudo for comments def S_SETREG_IMM32_B32_vi : SOPK_Real64<0x14, S_SETREG_IMM32_B32>, - Select_vi; + Select_vi; def S_CALL_B64_vi : SOPK_Real_vi <0x15, S_CALL_B64>; -- cgit v1.1 From e29228efae67cadfd18c532f944d19b4e16ff229 Mon Sep 17 00:00:00 2001 From: Joe Nash Date: Wed, 3 Apr 2024 14:51:27 -0400 Subject: [AMDGPU][MC] Allow VOP3C dpp src1 to be imm or SGPR (#87418) Allows src1 of VOP3 encoded VOPC to be an SGPR or inline immediate on GFX1150Plus The w32 and w64 _e64_dpp assembler only real instructions were unused, and erroneously constructed in a way that bugged parsing of the new instructions. They are removed. This patch is a follow up to PR https://github.com/llvm/llvm-project/pull/87382 --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 4 +- llvm/lib/Target/AMDGPU/VOPCInstructions.td | 58 +- llvm/lib/Target/AMDGPU/VOPInstructions.td | 1 - llvm/test/MC/AMDGPU/gfx1150_asm_features.s | 13 +- llvm/test/MC/AMDGPU/gfx12_asm_features.s | 39 +- llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s | 864 +++++++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s | 864 +++++++++++++++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s | 324 ++++++++ llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s | 324 ++++++++ llvm/test/MC/AMDGPU/gfx12_err.s | 16 - .../Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt | 223 ++++++ .../Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt | 232 ++++++ .../AMDGPU/gfx12_dasm_vop3cx_dpp16.txt | 168 ++++ .../Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt | 174 +++++ 14 files changed, 3218 insertions(+), 86 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 5d44396..4b74376 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -496,9 +496,7 @@ bool isVOPC64DPP(unsigned Opc) { return isVOPC64DPPOpcodeHelper(Opc) || isVOPC64DPP8OpcodeHelper(Opc); } -bool isVOPCAsmOnly(unsigned Opc) { - return isVOPCAsmOnlyOpcodeHelper(Opc) || isVOP3CAsmOnlyOpcodeHelper(Opc); -} +bool isVOPCAsmOnly(unsigned Opc) { return isVOPCAsmOnlyOpcodeHelper(Opc); } bool getMAIIsDGEMM(unsigned Opc) { const MAIInstInfo *Info = getMAIInstInfoHelper(Opc); diff --git a/llvm/lib/Target/AMDGPU/VOPCInstructions.td b/llvm/lib/Target/AMDGPU/VOPCInstructions.td index 16dd353..0b3a3d5 100644 --- a/llvm/lib/Target/AMDGPU/VOPCInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPCInstructions.td @@ -75,8 +75,6 @@ class VOPC_Profile sched, ValueType vt0, ValueType vt1 = vt let HasDst32 = 0; // VOPC disallows dst_sel and dst_unused as they have no effect on destination let EmitDstSel = 0; - // FIXME: work around AsmParser bug - let Src1ModVOP3DPP = getSrcModDPP.ret; let Outs64 = (outs VOPDstS64orS32:$sdst); let OutsVOP3DPP = Outs64; let OutsVOP3DPP8 = Outs64; @@ -114,8 +112,6 @@ class VOPC_NoSdst_Profile sched, ValueType vt0, "$src0, $src1"); let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; - // FIXME: work around AsmParser bug - let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_NoSdst_Profile_t16 sched, ValueType vt0, ValueType vt1 = vt0> { @@ -776,7 +772,7 @@ class VOPC_Class_Profile sched, ValueType src0VT, ValueType // DPP8 forbids modifiers and can inherit from VOPC_Profile let Ins64 = (ins Src0Mod:$src0_modifiers, Src0RC64:$src0, Src1RC64:$src1); - dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VRegSrc_32:$src1); + dag InsPartVOP3DPP = (ins FPVRegInputMods:$src0_modifiers, VGPRSrc_32:$src0, VCSrc_b32:$src1); let InsVOP3Base = !con(InsPartVOP3DPP, !if(HasOpSel, (ins op_sel0:$op_sel), (ins))); let AsmVOP3Base = "$sdst, $src0_modifiers, $src1"; @@ -789,8 +785,6 @@ class VOPC_Class_Profile sched, ValueType src0VT, ValueType let HasSrc1Mods = 0; let HasClamp = 0; let HasOMod = 0; - // FIXME: work around AsmParser bug - let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_Class_Profile_t16 sched> { @@ -818,8 +812,6 @@ class VOPC_Class_NoSdst_Profile sched, ValueType src0VT, Va let AsmVOP3Base = "$src0_modifiers, $src1"; let AsmSDWA9 = "$src0_modifiers, $src1_modifiers $src0_sel $src1_sel"; let EmitDst = 0; - // FIXME: work around AsmParser bug - let Src1ModVOP3DPP = getSrcModDPP.ret; } multiclass VOPC_Class_NoSdst_Profile_t16 sched> { @@ -1385,31 +1377,9 @@ multiclass VOPC_Real_Base op> { } if ps64.Pfl.HasExtVOP3DPP then { defvar psDPP = !cast(NAME #"_e64" #"_dpp"); - defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP>, SIMCInstr; - def _e64_dpp_w32#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> { - let AsmString = psDPP.OpName # " vcc_lo, " # AsmDPP; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave32; - } - def _e64_dpp_w64#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP> { - let AsmString = psDPP.OpName # " vcc, " # AsmDPP; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave64; - } - defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64>; - def _e64_dpp8_w32#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> { - let AsmString = ps32.OpName # " vcc_lo, " # AsmDPP8; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave32; - } - def _e64_dpp8_w64#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64> { - let AsmString = ps32.OpName # " vcc, " # AsmDPP8; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave64; - } } } // AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace } @@ -1480,35 +1450,9 @@ multiclass VOPC_Real_with_name op, string OpName, if ps64.Pfl.HasExtVOP3DPP then { defvar psDPP = !cast(OpName #"_e64" #"_dpp"); - defvar AsmDPP = ps64.Pfl.AsmVOP3DPP16; def _e64_dpp#Gen.Suffix : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name>, SIMCInstr; - def _e64_dpp_w32#Gen.Suffix - : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> { - let AsmString = asm_name # " vcc_lo, " # AsmDPP; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave32; - } - def _e64_dpp_w64#Gen.Suffix - : VOPC64_DPP16_Dst<{0, op}, psDPP, asm_name> { - let AsmString = asm_name # " vcc, " # AsmDPP; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave64; - } - defvar AsmDPP8 = ps64.Pfl.AsmVOP3DPP8; def _e64_dpp8#Gen.Suffix : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name>; - def _e64_dpp8_w32#Gen.Suffix - : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> { - let AsmString = asm_name # " vcc_lo, " # AsmDPP8; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave32; - } - def _e64_dpp8_w64#Gen.Suffix - : VOPC64_DPP8_Dst<{0, op}, ps64, asm_name> { - let AsmString = asm_name # " vcc, " # AsmDPP8; - let isAsmParserOnly = 1; - let WaveSizePredicate = isWave64; - } } } // End AssemblerPredicate = Gen.AssemblerPredicate, DecoderNamespace = Gen.DecoderNamespace } diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td index a6272e9..60e91c7 100644 --- a/llvm/lib/Target/AMDGPU/VOPInstructions.td +++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td @@ -1680,7 +1680,6 @@ class AsmOnlyInfoTable : GenericTable { } def VOPCAsmOnlyInfoTable : AsmOnlyInfoTable <"VOPC", "VOPC_DPPe_Common">; -def VOP3CAsmOnlyInfoTable : AsmOnlyInfoTable <"VOP3C", "VOP3_DPPe_Common_Base">; def VOPTrue16Table : GenericTable { let FilterClass = "VOP_Pseudo"; diff --git a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s index 336dd8b..58b7847 100644 --- a/llvm/test/MC/AMDGPU/gfx1150_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx1150_asm_features.s @@ -30,6 +30,17 @@ v_add_f32_e64_dpp v5, v1, s2 row_mirror v_min3_f16 v5, v1, s2, 2.0 op_sel:[1,1,0,1] quad_perm:[1,1,1,1] row_mask:0xf bank_mask:0xf // GFX1150: encoding: [0x05,0x58,0x49,0xd6,0xfa,0x04,0xd0,0x03,0x01,0x55,0x00,0xff] -// This is a regression test for potential changes in the future. v_cmp_le_f32 vcc_lo, v1, v2 row_mirror // GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] + +v_cmp_le_f32 vcc_lo, v1, s2 row_mirror +// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] +// GFX1150: encoding: [0x6a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x55,0x00,0xff] + +v_cmpx_neq_f16 v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// GFX1150: encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16 v1, 2.0 quad_perm:[1,1,1,1] +// GFX1150: encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x55,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_features.s b/llvm/test/MC/AMDGPU/gfx12_asm_features.s index f32b7da..7393de2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_features.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_features.s @@ -6,26 +6,49 @@ // v_add3_u32_e64_dpp v5, v1, s2, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x04,0x0c,0x04,0x01,0x1b,0x00,0xff] v_add3_u32_e64_dpp v5, v1, 42, v3 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf -// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff] +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xfa,0x54,0x0d,0x04,0x01,0x1b,0x00,0xff] v_add3_u32_e64_dpp v5, v1, s2, v0 dpp8:[7,6,5,4,3,2,1,0] -// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05] +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x00,0x04,0x01,0x77,0x39,0x05] v_add3_u32_e64_dpp v5, v1, 42, v0 dpp8:[7,6,5,4,3,2,1,0] -// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05] +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x54,0x01,0x04,0x01,0x77,0x39,0x05] v_add3_u32_e64_dpp v5, v1, s2, s3 dpp8:[7,6,5,4,3,2,1,0] -// GFX1150: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] +// GFX12: encoding: [0x05,0x00,0x55,0xd6,0xe9,0x04,0x0c,0x00,0x01,0x77,0x39,0x05] v_cmp_ne_i32_e64_dpp vcc_lo, v1, s2 dpp8:[7,6,5,4,3,2,1,0] -// GFX1150: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// GFX12: encoding: [0x6a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] -// This is a regression test for potential changes in the future. v_cmp_le_f32 vcc_lo, v1, v2 row_mirror -// GFX1150: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] +// GFX12: encoding: [0xfa,0x04,0x26,0x7c,0x01,0x40,0x01,0xff] + +v_cmp_eq_f32_e64_dpp s5, v1, s99 row_mirror +// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x40,0x01,0xff] + +v_cmp_eq_f32_e64_dpp s5, v1, s99 row_half_mirror +// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x41,0x01,0xff] + +v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shl:15 +// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x0f,0x01,0xff] + +v_cmp_eq_f32_e64_dpp s5, v1, s99 row_shr:1 +// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x11,0x01,0xff] + +v_cmp_eq_f32_e64_dpp s5, v1, s99 row_ror:1 +// GFX12: encoding: [0x05,0x00,0x12,0xd4,0xfa,0xc6,0x00,0x00,0x01,0x21,0x01,0xff] + +v_cmp_eq_f32_e64_dpp vcc_hi, |v1|, -s99 row_share:15 row_mask:0x0 bank_mask:0x1 +// GFX12: encoding: [0x6b,0x01,0x12,0xd4,0xfa,0xc6,0x00,0x40,0x01,0x5f,0x01,0x01] + +v_cmp_eq_f32_e64_dpp ttmp15, -v1, |s99| row_xmask:0 row_mask:0x1 bank_mask:0x3 bound_ctrl:1 fi:0 +// GFX12: encoding: [0x7b,0x02,0x12,0xd4,0xfa,0xc6,0x00,0x20,0x01,0x60,0x09,0x13] + +v_cmpx_gt_f32_e64_dpp v255, 4.0 dpp8:[0,0,0,0,0,0,0,0] fi:0 +// GFX12: encoding: [0x7e,0x00,0x94,0xd4,0xe9,0xec,0x01,0x00,0xff,0x00,0x00,0x00] // // Elements of CPol operand can be given in any order diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s index b50b18e..037fa39 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp16.s @@ -7,6 +7,14 @@ v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -59,6 +67,14 @@ v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x7d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -114,6 +130,14 @@ v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -166,6 +190,14 @@ v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x7e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -221,6 +253,14 @@ v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -273,6 +313,14 @@ v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x02,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -328,6 +376,14 @@ v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -380,6 +436,14 @@ v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x12,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -435,6 +499,14 @@ v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -487,6 +559,14 @@ v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x32,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -542,6 +622,14 @@ v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -594,6 +682,14 @@ v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x42,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -649,6 +745,14 @@ v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -701,6 +805,14 @@ v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -756,6 +868,14 @@ v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -808,6 +928,14 @@ v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4a,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -863,6 +991,14 @@ v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -915,6 +1051,14 @@ v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x06,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -970,6 +1114,14 @@ v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1022,6 +1174,14 @@ v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x16,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1077,6 +1237,14 @@ v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1129,6 +1297,14 @@ v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1184,6 +1360,14 @@ v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1236,6 +1420,14 @@ v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x46,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1291,6 +1483,14 @@ v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1343,6 +1543,14 @@ v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1398,6 +1606,14 @@ v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1450,6 +1666,14 @@ v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4e,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1505,6 +1729,14 @@ v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1557,6 +1789,14 @@ v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x04,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1612,6 +1852,14 @@ v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1664,6 +1912,14 @@ v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x14,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1719,6 +1975,14 @@ v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1771,6 +2035,14 @@ v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x34,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1826,6 +2098,14 @@ v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1878,6 +2158,14 @@ v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x44,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1933,6 +2221,14 @@ v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1985,6 +2281,14 @@ v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2040,6 +2344,14 @@ v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2092,6 +2404,14 @@ v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4c,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2147,6 +2467,14 @@ v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2199,6 +2527,14 @@ v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x03,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2254,6 +2590,14 @@ v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2306,6 +2650,14 @@ v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x13,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2361,6 +2713,14 @@ v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2413,6 +2773,14 @@ v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x33,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2468,6 +2836,14 @@ v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2520,6 +2896,14 @@ v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x43,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2575,6 +2959,14 @@ v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2627,6 +3019,14 @@ v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2682,6 +3082,14 @@ v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2734,6 +3142,14 @@ v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4b,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2789,6 +3205,14 @@ v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2841,6 +3265,14 @@ v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x05,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2896,6 +3328,14 @@ v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2948,6 +3388,14 @@ v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x15,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3003,6 +3451,14 @@ v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3055,6 +3511,14 @@ v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x01,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3110,6 +3574,14 @@ v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3162,6 +3634,14 @@ v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x11,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3217,6 +3697,14 @@ v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3269,6 +3757,14 @@ v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x31,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3324,6 +3820,14 @@ v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3376,6 +3880,14 @@ v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x41,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3431,6 +3943,14 @@ v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3483,6 +4003,14 @@ v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x39,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3538,6 +4066,14 @@ v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3590,6 +4126,14 @@ v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x49,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3645,6 +4189,14 @@ v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3697,6 +4249,14 @@ v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x35,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3752,6 +4312,14 @@ v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3804,6 +4372,14 @@ v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x45,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3859,6 +4435,14 @@ v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3911,6 +4495,14 @@ v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x3d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -3966,6 +4558,14 @@ v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, 10 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4018,6 +4618,14 @@ v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x4d,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4073,6 +4681,14 @@ v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4125,6 +4741,14 @@ v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4180,6 +4804,14 @@ v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4232,6 +4864,14 @@ v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4287,6 +4927,14 @@ v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4339,6 +4987,14 @@ v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x09,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4394,6 +5050,14 @@ v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4446,6 +5110,14 @@ v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x19,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4501,6 +5173,14 @@ v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4553,6 +5233,14 @@ v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4608,6 +5296,14 @@ v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4660,6 +5356,14 @@ v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4715,6 +5419,14 @@ v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4767,6 +5479,14 @@ v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4822,6 +5542,14 @@ v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4874,6 +5602,14 @@ v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4929,6 +5665,14 @@ v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -4981,6 +5725,14 @@ v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5036,6 +5788,14 @@ v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5088,6 +5848,14 @@ v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5143,6 +5911,14 @@ v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5195,6 +5971,14 @@ v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x0e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5250,6 +6034,14 @@ v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5302,6 +6094,14 @@ v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x1e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5357,6 +6157,14 @@ v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5409,6 +6217,14 @@ v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x07,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5464,6 +6280,14 @@ v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5516,6 +6340,14 @@ v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5571,6 +6403,14 @@ v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f16_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5623,6 +6463,14 @@ v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x08,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f16_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5678,6 +6526,14 @@ v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[3,2,1,0] // W32: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f32_e64_dpp s5, v1, s2 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, 2.0 quad_perm:[3,2,1,0] +// W32: [0x05,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f32_e64_dpp s5, v1, v2 quad_perm:[0,1,2,3] // W32: [0x05,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -5730,6 +6586,14 @@ v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] // W64: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f32_e64_dpp s[10:11], v1, s2 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 quad_perm:[3,2,1,0] +// W64: [0x0a,0x00,0x18,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f32_e64_dpp s[10:11], v1, v2 quad_perm:[0,1,2,3] // W64: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s index b9dc614..c5ba45e 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3c_dpp8.s @@ -7,6 +7,14 @@ v_cmp_class_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -27,6 +35,14 @@ v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -46,6 +62,14 @@ v_cmp_class_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -66,6 +90,14 @@ v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_class_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_class_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x7e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -85,6 +117,14 @@ v_cmp_eq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -105,6 +145,14 @@ v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x02,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -124,6 +172,14 @@ v_cmp_eq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -144,6 +200,14 @@ v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x12,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -163,6 +227,14 @@ v_cmp_eq_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -183,6 +255,14 @@ v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x32,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -202,6 +282,14 @@ v_cmp_eq_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -222,6 +310,14 @@ v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x42,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -241,6 +337,14 @@ v_cmp_eq_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -261,6 +365,14 @@ v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -280,6 +392,14 @@ v_cmp_eq_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -300,6 +420,14 @@ v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_eq_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_eq_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4a,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -319,6 +447,14 @@ v_cmp_ge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -339,6 +475,14 @@ v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x06,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -358,6 +502,14 @@ v_cmp_ge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -378,6 +530,14 @@ v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x16,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -397,6 +557,14 @@ v_cmp_ge_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -417,6 +585,14 @@ v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x36,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -436,6 +612,14 @@ v_cmp_ge_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -456,6 +640,14 @@ v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x46,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -475,6 +667,14 @@ v_cmp_ge_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -495,6 +695,14 @@ v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -514,6 +722,14 @@ v_cmp_ge_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -534,6 +750,14 @@ v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ge_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -553,6 +777,14 @@ v_cmp_gt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -573,6 +805,14 @@ v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x04,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -592,6 +832,14 @@ v_cmp_gt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -612,6 +860,14 @@ v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x14,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -631,6 +887,14 @@ v_cmp_gt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -651,6 +915,14 @@ v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x34,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -670,6 +942,14 @@ v_cmp_gt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -690,6 +970,14 @@ v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x44,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -709,6 +997,14 @@ v_cmp_gt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -729,6 +1025,14 @@ v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -748,6 +1052,14 @@ v_cmp_gt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -768,6 +1080,14 @@ v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_gt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_gt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4c,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -787,6 +1107,14 @@ v_cmp_le_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -807,6 +1135,14 @@ v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x03,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -826,6 +1162,14 @@ v_cmp_le_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -846,6 +1190,14 @@ v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x13,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -865,6 +1217,14 @@ v_cmp_le_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -885,6 +1245,14 @@ v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x33,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -904,6 +1272,14 @@ v_cmp_le_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -924,6 +1300,14 @@ v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x43,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -943,6 +1327,14 @@ v_cmp_le_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -963,6 +1355,14 @@ v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -982,6 +1382,14 @@ v_cmp_le_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1002,6 +1410,14 @@ v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_le_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_le_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4b,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1021,6 +1437,14 @@ v_cmp_lg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1041,6 +1465,14 @@ v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x05,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1060,6 +1492,14 @@ v_cmp_lg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1080,6 +1520,14 @@ v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x15,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1099,6 +1547,14 @@ v_cmp_lt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1119,6 +1575,14 @@ v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x01,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1138,6 +1602,14 @@ v_cmp_lt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1158,6 +1630,14 @@ v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x11,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1177,6 +1657,14 @@ v_cmp_lt_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1197,6 +1685,14 @@ v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x31,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1216,6 +1712,14 @@ v_cmp_lt_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1236,6 +1740,14 @@ v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x41,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1255,6 +1767,14 @@ v_cmp_lt_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1275,6 +1795,14 @@ v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x39,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1294,6 +1822,14 @@ v_cmp_lt_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1314,6 +1850,14 @@ v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_lt_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_lt_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x49,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1333,6 +1877,14 @@ v_cmp_ne_i16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1353,6 +1905,14 @@ v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x35,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1372,6 +1932,14 @@ v_cmp_ne_i32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1392,6 +1960,14 @@ v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_i32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_i32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x45,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1411,6 +1987,14 @@ v_cmp_ne_u16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1431,6 +2015,14 @@ v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u16_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x3d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1450,6 +2042,14 @@ v_cmp_ne_u32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s5, v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1470,6 +2070,14 @@ v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ne_u32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ne_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x4d,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1489,6 +2097,14 @@ v_cmp_neq_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1509,6 +2125,14 @@ v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1528,6 +2152,14 @@ v_cmp_neq_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1548,6 +2180,14 @@ v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_neq_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_neq_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1567,6 +2207,14 @@ v_cmp_nge_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1587,6 +2235,14 @@ v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x09,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1606,6 +2262,14 @@ v_cmp_nge_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1626,6 +2290,14 @@ v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nge_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nge_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x19,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1645,6 +2317,14 @@ v_cmp_ngt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1665,6 +2345,14 @@ v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1684,6 +2372,14 @@ v_cmp_ngt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1704,6 +2400,14 @@ v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_ngt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_ngt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1b,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1723,6 +2427,14 @@ v_cmp_nle_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1743,6 +2455,14 @@ v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1762,6 +2482,14 @@ v_cmp_nle_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1782,6 +2510,14 @@ v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nle_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nle_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1c,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1801,6 +2537,14 @@ v_cmp_nlg_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1821,6 +2565,14 @@ v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1840,6 +2592,14 @@ v_cmp_nlg_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1860,6 +2620,14 @@ v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlg_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlg_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1879,6 +2647,14 @@ v_cmp_nlt_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1899,6 +2675,14 @@ v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x0e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1918,6 +2702,14 @@ v_cmp_nlt_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1938,6 +2730,14 @@ v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_nlt_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_nlt_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x1e,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1957,6 +2757,14 @@ v_cmp_o_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1977,6 +2785,14 @@ v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x07,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -1996,6 +2812,14 @@ v_cmp_o_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2016,6 +2840,14 @@ v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_o_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_o_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x17,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2035,6 +2867,14 @@ v_cmp_u_f16_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f16_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2055,6 +2895,14 @@ v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f16_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x08,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2074,6 +2922,14 @@ v_cmp_u_f32_e64_dpp s5, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f32_e64_dpp s5, v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s5, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W32: [0x05,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f32_e64_dpp s105, v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W32: [0x69,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W64-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction @@ -2094,6 +2950,14 @@ v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction +v_cmp_u_f32_e64_dpp s[10:11], v1, s2 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x00,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + +v_cmp_u_f32_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] +// W64: [0x0a,0x00,0x18,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +// W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction + v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] // W64: [0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] // W32-ERR: :[[@LINE-2]]:{{[0-9]+}}: error: invalid operand for instruction diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s index 03958ba..eae2d5b2 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp16.s @@ -4,6 +4,12 @@ v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_class_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xfd,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_class_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -46,6 +52,12 @@ v_cmpx_class_f16_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 b v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_class_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_class_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xfe,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_class_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -88,6 +100,12 @@ v_cmpx_class_f32_e64_dpp -|v255|, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 b v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x82,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -130,6 +148,12 @@ v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x92,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -172,6 +196,12 @@ v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -214,6 +244,12 @@ v_cmpx_eq_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -256,6 +292,12 @@ v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xba,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -298,6 +340,12 @@ v_cmpx_eq_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_eq_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_eq_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xca,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_eq_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -340,6 +388,12 @@ v_cmpx_eq_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x86,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -382,6 +436,12 @@ v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x96,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -424,6 +484,12 @@ v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -466,6 +532,12 @@ v_cmpx_ge_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc6,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -508,6 +580,12 @@ v_cmpx_ge_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbe,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -550,6 +628,12 @@ v_cmpx_ge_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ge_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ge_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xce,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ge_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -592,6 +676,12 @@ v_cmpx_ge_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x84,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -634,6 +724,12 @@ v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x94,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -676,6 +772,12 @@ v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -718,6 +820,12 @@ v_cmpx_gt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc4,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -760,6 +868,12 @@ v_cmpx_gt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -802,6 +916,12 @@ v_cmpx_gt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_gt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_gt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcc,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_gt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -844,6 +964,12 @@ v_cmpx_gt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x83,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -886,6 +1012,12 @@ v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x93,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -928,6 +1060,12 @@ v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -970,6 +1108,12 @@ v_cmpx_le_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc3,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1012,6 +1156,12 @@ v_cmpx_le_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1054,6 +1204,12 @@ v_cmpx_le_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_le_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_le_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcb,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_le_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1096,6 +1252,12 @@ v_cmpx_le_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x85,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1138,6 +1300,12 @@ v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x95,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1180,6 +1348,12 @@ v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x81,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1222,6 +1396,12 @@ v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x91,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1264,6 +1444,12 @@ v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1306,6 +1492,12 @@ v_cmpx_lt_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc1,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1348,6 +1540,12 @@ v_cmpx_lt_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1390,6 +1588,12 @@ v_cmpx_lt_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_lt_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_lt_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc9,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_lt_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1432,6 +1636,12 @@ v_cmpx_lt_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ne_i16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xb5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ne_i16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1474,6 +1684,12 @@ v_cmpx_ne_i16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ne_i32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_i32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xc5,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ne_i32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1516,6 +1732,12 @@ v_cmpx_ne_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ne_u16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u16_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xbd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ne_u16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1558,6 +1780,12 @@ v_cmpx_ne_u16_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ne_u32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ne_u32_e64_dpp v1, 10 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0xcd,0xd4,0xfa,0x14,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ne_u32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1600,6 +1828,12 @@ v_cmpx_ne_u32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_c v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_neq_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_neq_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1642,6 +1876,12 @@ v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_neq_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_neq_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9d,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_neq_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1684,6 +1924,12 @@ v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nge_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x89,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nge_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1726,6 +1972,12 @@ v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nge_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nge_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x99,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nge_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1768,6 +2020,12 @@ v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ngt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ngt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1810,6 +2068,12 @@ v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_ngt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_ngt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9b,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_ngt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1852,6 +2116,12 @@ v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nle_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nle_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1894,6 +2164,12 @@ v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nle_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nle_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9c,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nle_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1936,6 +2212,12 @@ v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nlg_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nlg_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -1978,6 +2260,12 @@ v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nlg_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlg_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9a,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nlg_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2020,6 +2308,12 @@ v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nlt_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x8e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nlt_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2062,6 +2356,12 @@ v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_nlt_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_nlt_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x9e,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_nlt_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2104,6 +2404,12 @@ v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mas v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_o_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x87,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_o_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2146,6 +2452,12 @@ v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask: v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_o_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_o_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x97,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_o_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2188,6 +2500,12 @@ v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask: v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_u_f16_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f16_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x88,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_u_f16_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] @@ -2230,6 +2548,12 @@ v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp row_xmask:15 row_mask:0x3 bank_mask: v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[3,2,1,0] // GFX12: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] +v_cmpx_u_f32_e64_dpp v1, s2 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x00,0x00,0x01,0x1b,0x00,0xff] + +v_cmpx_u_f32_e64_dpp v1, 2.0 quad_perm:[3,2,1,0] +// GFX12: [0x7e,0x00,0x98,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x1b,0x00,0xff] + v_cmpx_u_f32_e64_dpp v1, v2 quad_perm:[0,1,2,3] // GFX12: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0xe4,0x00,0xff] diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s index efc6168..d63ca0c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s +++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3cx_dpp8.s @@ -7,6 +7,12 @@ v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_class_f16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xfd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xfd,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x01,0xfd,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] @@ -16,6 +22,12 @@ v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_class_f32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xfe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_class_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xfe,0xd4,0xea,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x01,0xfe,0xd4,0xe9,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] @@ -28,6 +40,12 @@ v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_eq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x82,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x82,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_eq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x82,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -40,6 +58,12 @@ v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_eq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x92,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_eq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x92,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_eq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x92,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -49,6 +73,12 @@ v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_eq_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -58,6 +88,12 @@ v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_eq_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc2,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc2,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc2,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -67,6 +103,12 @@ v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_eq_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xba,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xba,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xba,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -76,6 +118,12 @@ v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_eq_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xca,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_eq_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xca,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xca,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -88,6 +136,12 @@ v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_ge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x86,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x86,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_ge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x86,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -100,6 +154,12 @@ v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_ge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x96,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x96,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_ge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x96,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -109,6 +169,12 @@ v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ge_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -118,6 +184,12 @@ v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ge_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc6,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc6,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc6,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -127,6 +199,12 @@ v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ge_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbe,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbe,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xbe,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -136,6 +214,12 @@ v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ge_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xce,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ge_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xce,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xce,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -148,6 +232,12 @@ v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_gt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x84,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x84,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_gt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x84,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -160,6 +250,12 @@ v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_gt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x94,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_gt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x94,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_gt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x94,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -169,6 +265,12 @@ v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_gt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -178,6 +280,12 @@ v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_gt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc4,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc4,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc4,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -187,6 +295,12 @@ v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_gt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xbc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -196,6 +310,12 @@ v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_gt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcc,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_gt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcc,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xcc,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -208,6 +328,12 @@ v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_le_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x83,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x83,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_le_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x83,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -220,6 +346,12 @@ v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_le_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x93,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_le_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x93,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_le_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x93,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -229,6 +361,12 @@ v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_le_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -238,6 +376,12 @@ v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_le_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc3,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc3,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc3,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -247,6 +391,12 @@ v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_le_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xbb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -256,6 +406,12 @@ v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_le_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcb,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_le_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcb,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xcb,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -268,6 +424,12 @@ v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x85,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x85,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_lg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x85,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -280,6 +442,12 @@ v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x95,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x95,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_lg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x95,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -292,6 +460,12 @@ v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x81,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x81,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_lt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x81,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -304,6 +478,12 @@ v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_lt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x91,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_lt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x91,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_lt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x91,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -313,6 +493,12 @@ v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -322,6 +508,12 @@ v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc1,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc1,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc1,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -331,6 +523,12 @@ v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -340,6 +538,12 @@ v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_lt_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc9,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_lt_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc9,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc9,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -349,6 +553,12 @@ v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ne_i16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xb5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xb5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -358,6 +568,12 @@ v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ne_i32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc5,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xc5,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xc5,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -367,6 +583,12 @@ v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ne_u16_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u16_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xbd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xbd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -376,6 +598,12 @@ v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x02,0x00,0x01,0x77,0x39,0x05] +v_cmpx_ne_u32_e64_dpp v1, s2 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcd,0xd4,0xea,0x04,0x00,0x00,0x01,0x77,0x39,0x05] + +v_cmpx_ne_u32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x00,0xcd,0xd4,0xea,0x14,0x01,0x00,0x01,0x77,0x39,0x05] + v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x00,0xcd,0xd4,0xe9,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] @@ -388,6 +616,12 @@ v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_neq_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_neq_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_neq_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x8d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -400,6 +634,12 @@ v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_neq_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_neq_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9d,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_neq_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9d,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_neq_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x9d,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -412,6 +652,12 @@ v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nge_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nge_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x89,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x89,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nge_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x89,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -424,6 +670,12 @@ v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nge_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nge_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x99,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nge_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x99,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nge_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x99,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -436,6 +688,12 @@ v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ngt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_ngt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_ngt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x8b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -448,6 +706,12 @@ v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_ngt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_ngt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9b,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_ngt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9b,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_ngt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x9b,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -460,6 +724,12 @@ v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nle_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nle_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nle_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x8c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -472,6 +742,12 @@ v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nle_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nle_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9c,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nle_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9c,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nle_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x9c,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -484,6 +760,12 @@ v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nlg_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nlg_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nlg_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x8a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -496,6 +778,12 @@ v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nlg_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nlg_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9a,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlg_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9a,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nlg_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x9a,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -508,6 +796,12 @@ v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nlt_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nlt_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x8e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nlt_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x8e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -520,6 +814,12 @@ v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_nlt_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_nlt_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9e,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_nlt_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x9e,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_nlt_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x9e,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -532,6 +832,12 @@ v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_o_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_o_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x87,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x87,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_o_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x87,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -544,6 +850,12 @@ v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_o_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_o_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x97,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_o_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x97,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_o_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x97,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -556,6 +868,12 @@ v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_u_f16_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_u_f16_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x88,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f16_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x88,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_u_f16_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x88,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] @@ -568,5 +886,11 @@ v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] v_cmpx_u_f32_e64_dpp -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] fi:1 // GFX12: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x02,0x20,0x01,0x77,0x39,0x05] +v_cmpx_u_f32_e64_dpp -v1, |s2| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x98,0xd4,0xea,0x04,0x00,0x20,0x01,0x77,0x39,0x05] + +v_cmpx_u_f32_e64_dpp -v1, |2.0| dpp8:[7,6,5,4,3,2,1,0] fi:1 +// GFX12: [0x7e,0x02,0x98,0xd4,0xea,0xe8,0x01,0x20,0x01,0x77,0x39,0x05] + v_cmpx_u_f32_e64_dpp -|v255|, -|v255| clamp dpp8:[0,0,0,0,0,0,0,0] fi:0 // GFX12: [0x7e,0x83,0x98,0xd4,0xe9,0xfe,0x03,0x60,0xff,0x00,0x00,0x00] diff --git a/llvm/test/MC/AMDGPU/gfx12_err.s b/llvm/test/MC/AMDGPU/gfx12_err.s index 245ca5f..8b2565c 100644 --- a/llvm/test/MC/AMDGPU/gfx12_err.s +++ b/llvm/test/MC/AMDGPU/gfx12_err.s @@ -127,19 +127,3 @@ s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: expected a 24-bit signed offset // GFX12-ERR: s_prefetch_inst s[14:15], 0xffffff, m0, 7 // GFX12-ERR: ^ - -v_cmp_le_f32 vcc_lo, v1, s2 row_mirror -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: -// FIXME add test when VOPC e64_dpp src1 asm is fixed - -v_cmp_le_f32 vcc_lo, v1, s2 quad_perm:[1,1,1,1] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: -// FIXME add test when VOPC e64_dpp src1 asm is fixed - -v_cmpx_gt_u16 v1, s2 op_sel:[1,1] quad_perm:[1,1,1,1] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: -// FIXME add test when VOPC e64_dpp src1 asm is fixed - -v_cmpx_class_f16_u16 v1, 2.0 quad_perm:[1,1,1,1] -// GFX12-ERR: :[[@LINE-1]]:{{[0-9]+}}: error: -// FIXME add test when VOPC e64_dpp src1 asm is fixed diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt index e6ea6da..13e34ca 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp16.txt @@ -21,6 +21,10 @@ # W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_class_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x7d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_class_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x7d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -76,6 +80,10 @@ # W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_class_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x7e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_class_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x7e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -131,6 +139,10 @@ # W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x02,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x02,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -186,6 +198,10 @@ # W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x12,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x12,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -241,6 +257,10 @@ # W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x32,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x32,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -296,6 +316,10 @@ # W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x42,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x42,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -351,6 +375,10 @@ # W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x3a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x3a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -406,6 +434,10 @@ # W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_eq_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x4a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_eq_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x4a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -461,6 +493,10 @@ # W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x06,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x06,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -516,6 +552,10 @@ # W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x16,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x16,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -571,6 +611,10 @@ # W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x36,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x36,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -606,6 +650,9 @@ # GFX12: v_cmp_ge_i16_e64_dpp null, v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x7c,0x00,0x36,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 +# GFX12: v_cmp_ge_i16_e64_dpp null, v255, 10 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7c,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0xff,0x6f,0x0d,0x30] +0x7c,0x00,0x36,0xd4,0xfa,0x14,0x01,0x00,0xff,0x6f,0x0d,0x30 + # W32: v_cmp_ge_i32_e64_dpp s10, v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] # W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 quad_perm:[3,2,1,0] row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff] 0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x1b,0x00,0xff @@ -626,6 +673,10 @@ # W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x46,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x46,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -681,6 +732,10 @@ # W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x3e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x3e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -736,6 +791,10 @@ # W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ge_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x4e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ge_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x4e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -791,6 +850,10 @@ # W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x04,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x04,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -846,6 +909,10 @@ # W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x14,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x14,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -901,6 +968,10 @@ # W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x34,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x34,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -956,6 +1027,10 @@ # W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x44,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x44,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1011,6 +1086,10 @@ # W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x3c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x3c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1066,6 +1145,10 @@ # W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_gt_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x4c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_gt_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x4c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1121,6 +1204,10 @@ # W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x03,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x03,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1176,6 +1263,10 @@ # W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x13,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x13,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1231,6 +1322,10 @@ # W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x33,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x33,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1286,6 +1381,10 @@ # W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x43,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x43,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1341,6 +1440,10 @@ # W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x3b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x3b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1396,6 +1499,10 @@ # W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_le_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x4b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_le_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x4b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1451,6 +1558,10 @@ # W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x05,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x05,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1506,6 +1617,10 @@ # W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lg_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x15,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lg_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x15,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1561,6 +1676,10 @@ # W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x01,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x01,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1616,6 +1735,10 @@ # W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x11,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x11,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1671,6 +1794,10 @@ # W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x31,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x31,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1726,6 +1853,10 @@ # W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x41,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x41,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1781,6 +1912,10 @@ # W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x39,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x39,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1836,6 +1971,10 @@ # W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_lt_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x49,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_lt_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x49,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1891,6 +2030,10 @@ # W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ne_i16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x35,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ne_i16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x35,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -1946,6 +2089,10 @@ # W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ne_i32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x45,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ne_i32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x45,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2001,6 +2148,10 @@ # W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ne_u16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x3d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ne_u16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x3d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2056,6 +2207,10 @@ # W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ne_u32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x4d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ne_u32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x4d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2111,6 +2266,10 @@ # W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_neq_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x0d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_neq_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x0d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2166,6 +2325,10 @@ # W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_neq_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x1d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_neq_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x1d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2221,6 +2384,10 @@ # W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nge_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x09,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nge_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x09,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2276,6 +2443,10 @@ # W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nge_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x19,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nge_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x19,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2331,6 +2502,10 @@ # W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ngt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x0b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ngt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x0b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2386,6 +2561,10 @@ # W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_ngt_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x1b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_ngt_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x1b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2441,6 +2620,10 @@ # W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nle_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x0c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nle_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x0c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2496,6 +2679,10 @@ # W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nle_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x1c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nle_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x1c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2551,6 +2738,10 @@ # W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nlg_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x0a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nlg_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x0a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2606,6 +2797,10 @@ # W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nlg_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x1a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nlg_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x1a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2661,6 +2856,10 @@ # W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nlt_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x0e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nlt_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x0e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2716,6 +2915,10 @@ # W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_nlt_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x1e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_nlt_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x1e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2771,6 +2974,10 @@ # W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_o_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x07,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_o_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x07,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2826,6 +3033,10 @@ # W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_o_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x17,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_o_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2850,6 +3061,10 @@ # W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x68,0x00,0x17,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff +# W32: v_cmp_o_f32_e64_dpp s104, v1, 2.0 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x50,0x01,0xff] +# W64: v_cmp_o_f32_e64_dpp s[104:105], v1, 2.0 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x68,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x50,0x01,0xff] +0x68,0x00,0x17,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x50,0x01,0xff + # W32: v_cmp_o_f32_e64_dpp vcc_lo, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] # W64: v_cmp_o_f32_e64_dpp vcc, |v1|, -v2 row_share:15 row_mask:0x0 bank_mask:0x1 ; encoding: [0x6a,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01] 0x6a,0x01,0x17,0xd4,0xfa,0x04,0x02,0x40,0x01,0x5f,0x01,0x01 @@ -2881,6 +3096,10 @@ # W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_u_f16_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x08,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_u_f16_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x08,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff @@ -2936,6 +3155,10 @@ # W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff] 0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x01,0x01,0xff +# W32: v_cmp_u_f32_e64_dpp s10, v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +# W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s3 row_shl:1 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff] +0x0a,0x00,0x18,0xd4,0xfa,0x06,0x00,0x00,0x01,0x01,0x01,0xff + # W32: v_cmp_u_f32_e64_dpp s10, v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] # W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 row_shl:15 row_mask:0xf bank_mask:0xf ; encoding: [0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff] 0x0a,0x00,0x18,0xd4,0xfa,0x04,0x02,0x00,0x01,0x0f,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt index 98f8fd9..f36857b 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3c_dpp8.txt @@ -5,6 +5,14 @@ # W64: v_cmp_class_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_class_f16_e64_dpp s10, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x7d,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05 + +# W32: v_cmp_class_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_class_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x7d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_class_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_class_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x7d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -24,6 +32,10 @@ # W64: v_cmp_class_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_class_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_class_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x7e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x7e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_class_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_class_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x7e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -43,6 +55,10 @@ # W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x02,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x02,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -62,6 +78,10 @@ # W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x12,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x12,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x12,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -81,6 +101,10 @@ # W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x32,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x32,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x32,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -100,6 +124,10 @@ # W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x42,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x42,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x42,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -119,6 +147,10 @@ # W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x3a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x3a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -138,6 +170,10 @@ # W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_eq_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_eq_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_eq_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_eq_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x4a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -157,6 +193,10 @@ # W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x06,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x06,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -176,6 +216,10 @@ # W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x16,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x16,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x16,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -184,6 +228,10 @@ # W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x6a,0x01,0x16,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_f32_e64_dpp vcc_lo, |v1|, -2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x16,0xd4,0xe9,0xea,0x01,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_f32_e64_dpp vcc, |v1|, -2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x16,0xd4,0xe9,0xea,0x01,0x00,0x01,0x77,0x39,0x05] +0x6a,0x01,0x16,0xd4,0xe9,0xea,0x01,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_f32_e64_dpp ttmp14, -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x16,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_f32_e64_dpp ttmp[14:15], -v1, |v2| dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7a,0x02,0x16,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05] 0x7a,0x02,0x16,0xd4,0xe9,0x04,0x02,0x20,0x01,0x77,0x39,0x05 @@ -195,6 +243,10 @@ # W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x36,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x36,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x36,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -214,6 +266,10 @@ # W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x46,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x46,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x46,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -233,6 +289,10 @@ # W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x3e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x3e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -252,6 +312,14 @@ # W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ge_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + +# W32: v_cmp_ge_u32_e64_dpp s10, v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ge_u32_e64_dpp s[10:11], v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4e,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ge_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ge_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x4e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -271,6 +339,10 @@ # W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x04,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x04,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -290,6 +362,10 @@ # W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x14,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x14,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x14,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -309,6 +385,10 @@ # W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x34,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x34,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x34,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -328,6 +408,10 @@ # W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x44,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x44,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x44,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -347,6 +431,10 @@ # W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x3c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x3c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -366,6 +454,10 @@ # W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_gt_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_gt_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_gt_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_gt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x4c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -385,6 +477,10 @@ # W64: v_cmp_le_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x03,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x03,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -404,6 +500,10 @@ # W64: v_cmp_le_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x13,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x13,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x13,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -423,6 +523,10 @@ # W64: v_cmp_le_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x33,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x33,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x33,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -442,6 +546,10 @@ # W64: v_cmp_le_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x43,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x43,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x43,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -461,6 +569,10 @@ # W64: v_cmp_le_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x3b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x3b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -480,6 +592,10 @@ # W64: v_cmp_le_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_le_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_le_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_le_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_le_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x4b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -499,6 +615,10 @@ # W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x05,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x05,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -518,6 +638,10 @@ # W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lg_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lg_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x15,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x15,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lg_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x15,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -537,6 +661,10 @@ # W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x01,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x01,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x01,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -556,6 +684,10 @@ # W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x11,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x11,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x11,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -575,6 +707,10 @@ # W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x31,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x31,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x31,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -594,6 +730,10 @@ # W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x41,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x41,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x41,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -613,6 +753,10 @@ # W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x39,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x39,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x39,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -632,6 +776,10 @@ # W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_lt_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_lt_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x49,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x49,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_lt_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_lt_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x49,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -651,6 +799,10 @@ # W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ne_i16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ne_i16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x35,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x35,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ne_i16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ne_i16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x35,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -670,6 +822,10 @@ # W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ne_i32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ne_i32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x45,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x45,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ne_i32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ne_i32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x45,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -689,6 +845,10 @@ # W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ne_u16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ne_u16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x3d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x3d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ne_u16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ne_u16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x3d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -708,6 +868,10 @@ # W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ne_u32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ne_u32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x4d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x4d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ne_u32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ne_u32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x4d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -727,6 +891,10 @@ # W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_neq_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_neq_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x0d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_neq_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_neq_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x0d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -746,6 +914,10 @@ # W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_neq_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_neq_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x1d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_neq_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_neq_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x1d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -765,6 +937,10 @@ # W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nge_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nge_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x09,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nge_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nge_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x09,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -784,6 +960,10 @@ # W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nge_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nge_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x19,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x19,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nge_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nge_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x19,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -803,6 +983,10 @@ # W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ngt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ngt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x0b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ngt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ngt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x0b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -822,6 +1006,10 @@ # W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_ngt_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_ngt_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x1b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_ngt_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_ngt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x1b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -841,6 +1029,10 @@ # W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nle_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nle_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x0c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nle_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nle_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x0c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -860,6 +1052,10 @@ # W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nle_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nle_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x1c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nle_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nle_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x1c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -879,6 +1075,10 @@ # W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nlg_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nlg_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x0a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nlg_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nlg_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x0a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -898,10 +1098,18 @@ # W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nlg_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nlg_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x1a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nlg_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x1a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nlg_f32_e64_dpp s104, v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nlg_f32_e64_dpp s[104:105], v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +0x68,0x00,0x1a,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nlg_f32_e64_dpp vcc_lo, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] # W64: v_cmp_nlg_f32_e64_dpp vcc, |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x6a,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x6a,0x01,0x1a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -917,6 +1125,10 @@ # W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nlt_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nlt_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x0e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nlt_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nlt_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x0e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -936,6 +1148,10 @@ # W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_nlt_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_nlt_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x1e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x1e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_nlt_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_nlt_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x1e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -955,6 +1171,10 @@ # W64: v_cmp_o_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_o_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_o_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x07,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_o_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_o_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x07,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -974,6 +1194,10 @@ # W64: v_cmp_o_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_o_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_o_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x17,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x17,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_o_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_o_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x17,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -993,6 +1217,10 @@ # W64: v_cmp_u_f16_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_u_f16_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_u_f16_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x08,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_u_f16_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_u_f16_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x08,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 @@ -1012,6 +1240,10 @@ # W64: v_cmp_u_f32_e64_dpp s[10:11], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x0a,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# W32: v_cmp_u_f32_e64_dpp s10, v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +# W64: v_cmp_u_f32_e64_dpp s[10:11], v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x0a,0x00,0x18,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x0a,0x00,0x18,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # W32: v_cmp_u_f32_e64_dpp s104, v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] # W64: v_cmp_u_f32_e64_dpp s[104:105], v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x68,0x00,0x18,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt index eb7675f..0f933f0 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp16.txt @@ -31,6 +31,9 @@ # GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_class_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xfd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_class_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xfd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -73,6 +76,9 @@ # GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_class_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xfe,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_class_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xfe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -115,6 +121,9 @@ # GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x82,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x82,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -157,6 +166,9 @@ # GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x92,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x92,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -199,6 +211,9 @@ # GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb2,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -241,6 +256,9 @@ # GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc2,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -250,6 +268,9 @@ # GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13] 0x7e,0x00,0xc2,0xd4,0xfa,0x04,0x02,0x00,0x01,0x60,0x01,0x13 +# GFX12: v_cmpx_eq_i32_e64_dpp v1, 10 row_xmask:0 row_mask:0x1 bank_mask:0x3 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x60,0x01,0x13] +0x7e,0x00,0xc2,0xd4,0xfa,0x14,0x01,0x00,0x01,0x60,0x01,0x13 + # GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 row_xmask:15 row_mask:0x3 bank_mask:0x0 bound_ctrl:1 fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30] 0x7e,0x00,0xc2,0xd4,0xfa,0xfe,0x03,0x00,0xff,0x6f,0x0d,0x30 @@ -283,6 +304,9 @@ # GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xba,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xba,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -325,6 +349,9 @@ # GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_eq_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xca,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xca,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -367,6 +394,9 @@ # GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x86,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x86,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -409,6 +439,9 @@ # GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x96,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x96,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -451,6 +484,9 @@ # GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb6,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -493,6 +529,9 @@ # GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc6,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc6,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -535,6 +574,9 @@ # GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xbe,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xbe,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -577,6 +619,9 @@ # GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ge_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xce,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xce,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -619,6 +664,9 @@ # GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x84,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x84,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -661,6 +709,9 @@ # GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x94,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x94,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -703,6 +754,9 @@ # GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb4,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -745,6 +799,9 @@ # GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc4,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc4,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -787,6 +844,9 @@ # GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xbc,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xbc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -829,6 +889,9 @@ # GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_gt_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xcc,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xcc,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -871,6 +934,9 @@ # GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x83,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x83,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -913,6 +979,9 @@ # GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x93,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x93,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -955,6 +1024,9 @@ # GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb3,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -997,6 +1069,9 @@ # GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc3,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc3,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1039,6 +1114,9 @@ # GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xbb,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xbb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1081,6 +1159,9 @@ # GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_le_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xcb,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_le_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xcb,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1123,6 +1204,9 @@ # GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lg_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x85,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x85,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1165,6 +1249,9 @@ # GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lg_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x95,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x95,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1207,6 +1294,9 @@ # GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x81,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x81,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1249,6 +1339,9 @@ # GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x91,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x91,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1291,6 +1384,9 @@ # GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb1,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1333,6 +1429,9 @@ # GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc1,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc1,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1375,6 +1474,9 @@ # GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb9,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1417,6 +1519,9 @@ # GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_lt_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc9,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc9,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1459,6 +1564,9 @@ # GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ne_i16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xb5,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xb5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1501,6 +1609,9 @@ # GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ne_i32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xc5,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xc5,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1543,6 +1654,9 @@ # GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ne_u16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xbd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xbd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1585,6 +1699,9 @@ # GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ne_u32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0xcd,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0xcd,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1627,6 +1744,9 @@ # GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_neq_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x8d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x8d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1669,6 +1789,9 @@ # GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_neq_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x9d,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x9d,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1711,6 +1834,9 @@ # GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nge_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x89,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x89,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1753,6 +1879,9 @@ # GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nge_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x99,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x99,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1795,6 +1924,9 @@ # GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ngt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x8b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x8b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1837,6 +1969,9 @@ # GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_ngt_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x9b,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x9b,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1879,6 +2014,9 @@ # GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nle_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x8c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x8c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1921,6 +2059,9 @@ # GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nle_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x9c,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x9c,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -1963,6 +2104,9 @@ # GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nlg_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x8a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x8a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2005,6 +2149,9 @@ # GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nlg_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x9a,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x9a,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2047,6 +2194,9 @@ # GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nlt_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x8e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x8e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2089,6 +2239,9 @@ # GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_nlt_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x9e,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x9e,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2131,6 +2284,9 @@ # GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_o_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x87,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_o_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x87,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2173,6 +2329,9 @@ # GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_o_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x97,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_o_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x97,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2215,6 +2374,9 @@ # GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_u_f16_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x88,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_u_f16_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x88,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff @@ -2257,6 +2419,12 @@ # GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff] 0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x2f,0x01,0xff +# GFX12: v_cmpx_u_f32_e64_dpp v1, 2.0 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x98,0xd4,0xfa,0xe8,0x01,0x00,0x01,0x2f,0x01,0xff + +# GFX12: v_cmpx_u_f32_e64_dpp v1, s3 row_ror:15 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff] +0x7e,0x00,0x98,0xd4,0xfa,0x06,0x00,0x00,0x01,0x2f,0x01,0xff + # GFX12: v_cmpx_u_f32_e64_dpp v1, v2 row_share:0 row_mask:0xf bank_mask:0xf ; encoding: [0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff] 0x7e,0x00,0x98,0xd4,0xfa,0x04,0x02,0x00,0x01,0x50,0x01,0xff diff --git a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt index d5e112e..bf4f971 100644 --- a/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt +++ b/llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3cx_dpp8.txt @@ -4,18 +4,27 @@ # GFX12: v_cmpx_class_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xfd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_class_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xfd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_class_f16_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x01,0xfd,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] 0x7e,0x01,0xfd,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_class_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xfe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_class_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xfe,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xfe,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_class_f32_e64_dpp -|v255|, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x01,0xfe,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00] 0x7e,0x01,0xfe,0xd4,0xea,0xfe,0x03,0x20,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_eq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x82,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x82,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x82,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x82,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x82,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -28,6 +37,12 @@ # GFX12: v_cmpx_eq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x92,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x92,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_f32_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x92,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x92,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_cmpx_eq_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x92,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x92,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x92,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x92,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -40,30 +55,48 @@ # GFX12: v_cmpx_eq_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb2,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb2,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb2,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb2,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_eq_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc2,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc2,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc2,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc2,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc2,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_eq_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xba,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xba,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xba,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xba,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xba,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_eq_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xca,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_eq_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xca,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xca,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_eq_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xca,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xca,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x86,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x86,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_cmpx_ge_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x86,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x86,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x86,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x86,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -76,6 +109,9 @@ # GFX12: v_cmpx_ge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x96,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x96,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x96,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x96,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x96,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x96,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -88,30 +124,45 @@ # GFX12: v_cmpx_ge_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb6,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb6,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb6,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb6,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ge_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc6,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc6,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc6,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc6,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc6,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ge_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xbe,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbe,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xbe,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xbe,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xbe,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ge_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xce,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ge_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xce,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xce,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ge_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xce,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xce,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_gt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x84,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x84,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x84,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x84,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x84,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -124,6 +175,9 @@ # GFX12: v_cmpx_gt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x94,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x94,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x94,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x94,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x94,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x94,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -136,30 +190,45 @@ # GFX12: v_cmpx_gt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb4,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb4,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb4,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb4,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_gt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc4,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc4,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc4,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc4,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc4,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_gt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xbc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbc,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xbc,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xbc,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xbc,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_gt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xcc,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_gt_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcc,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xcc,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_gt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xcc,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xcc,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_le_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x83,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x83,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x83,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x83,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x83,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -172,6 +241,9 @@ # GFX12: v_cmpx_le_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x93,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x93,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x93,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x93,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x93,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x93,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -184,30 +256,45 @@ # GFX12: v_cmpx_le_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb3,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb3,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb3,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb3,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_le_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc3,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc3,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc3,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc3,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc3,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_le_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xbb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbb,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xbb,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xbb,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xbb,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_le_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xcb,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_le_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcb,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xcb,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_le_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xcb,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xcb,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_lg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x85,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lg_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x85,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x85,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x85,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x85,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -220,6 +307,9 @@ # GFX12: v_cmpx_lg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x95,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x95,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lg_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x95,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x95,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x95,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x95,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -232,6 +322,9 @@ # GFX12: v_cmpx_lt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x81,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x81,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x81,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -244,6 +337,9 @@ # GFX12: v_cmpx_lt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x91,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x91,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x91,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x91,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x91,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -256,54 +352,84 @@ # GFX12: v_cmpx_lt_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb1,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb1,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb1,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb1,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_lt_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc1,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc1,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc1,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc1,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc1,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_lt_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb9,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb9,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb9,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb9,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_lt_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc9,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_lt_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc9,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc9,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_lt_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc9,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc9,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ne_i16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xb5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ne_i16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xb5,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xb5,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ne_i16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xb5,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xb5,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ne_i32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xc5,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ne_i32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc5,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_cmpx_ne_i32_e64_dpp v1, 10 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xc5,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xc5,0xd4,0xe9,0x14,0x01,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ne_i32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xc5,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xc5,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ne_u16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xbd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ne_u16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xbd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xbd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ne_u16_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xbd,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xbd,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_ne_u32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0xcd,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ne_u32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0xcd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0xcd,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ne_u32_e64_dpp v255, v255 dpp8:[0,0,0,0,0,0,0,0] fi:1 ; encoding: [0x7e,0x00,0xcd,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00] 0x7e,0x00,0xcd,0xd4,0xea,0xfe,0x03,0x00,0xff,0x00,0x00,0x00 # GFX12: v_cmpx_neq_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x8d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_neq_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x8d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_neq_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x8d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -316,6 +442,9 @@ # GFX12: v_cmpx_neq_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x9d,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_neq_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x9d,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_neq_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x9d,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -328,6 +457,12 @@ # GFX12: v_cmpx_nge_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x89,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nge_f16_e64_dpp v1, 2.0 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x89,0xd4,0xe9,0xe8,0x01,0x00,0x01,0x77,0x39,0x05 + +# GFX12: v_cmpx_nge_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x89,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x89,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nge_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x89,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x89,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -340,6 +475,9 @@ # GFX12: v_cmpx_nge_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x99,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x99,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nge_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x99,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x99,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nge_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x99,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x99,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -352,6 +490,9 @@ # GFX12: v_cmpx_ngt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x8b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ngt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x8b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ngt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x8b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -364,6 +505,9 @@ # GFX12: v_cmpx_ngt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x9b,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_ngt_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x9b,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_ngt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x9b,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -376,6 +520,9 @@ # GFX12: v_cmpx_nle_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x8c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nle_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x8c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nle_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x8c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -388,6 +535,9 @@ # GFX12: v_cmpx_nle_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x9c,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nle_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x9c,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nle_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x9c,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -400,6 +550,9 @@ # GFX12: v_cmpx_nlg_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x8a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nlg_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x8a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nlg_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x8a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -412,6 +565,9 @@ # GFX12: v_cmpx_nlg_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x9a,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nlg_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x9a,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nlg_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x9a,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -424,6 +580,9 @@ # GFX12: v_cmpx_nlt_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x8e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nlt_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x8e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x8e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nlt_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x8e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x8e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -436,6 +595,9 @@ # GFX12: v_cmpx_nlt_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x9e,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_nlt_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x9e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x9e,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_nlt_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x9e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x9e,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -448,6 +610,9 @@ # GFX12: v_cmpx_o_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x87,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_o_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x87,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x87,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_o_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x87,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x87,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -460,6 +625,9 @@ # GFX12: v_cmpx_o_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x97,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x97,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_o_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x97,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x97,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_o_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x97,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x97,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -472,6 +640,9 @@ # GFX12: v_cmpx_u_f16_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x88,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_u_f16_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x88,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x88,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_u_f16_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x88,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x88,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 @@ -484,6 +655,9 @@ # GFX12: v_cmpx_u_f32_e64_dpp v1, v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x98,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05] 0x7e,0x00,0x98,0xd4,0xe9,0x04,0x02,0x00,0x01,0x77,0x39,0x05 +# GFX12: v_cmpx_u_f32_e64_dpp v1, s3 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x00,0x98,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05] +0x7e,0x00,0x98,0xd4,0xe9,0x06,0x00,0x00,0x01,0x77,0x39,0x05 + # GFX12: v_cmpx_u_f32_e64_dpp |v1|, -v2 dpp8:[7,6,5,4,3,2,1,0] ; encoding: [0x7e,0x01,0x98,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05] 0x7e,0x01,0x98,0xd4,0xe9,0x04,0x02,0x40,0x01,0x77,0x39,0x05 -- cgit v1.1 From a8425d2fa2e0d29b83d16eac008441ecb9516320 Mon Sep 17 00:00:00 2001 From: Kevin Frei Date: Wed, 3 Apr 2024 12:15:41 -0700 Subject: DebugInfoD issues, take 2 (#86812) The previous diff (and it's subsequent fix) were reverted as the tests didn't work properly on the AArch64 & ARM LLDB buildbots. I made a couple more minor changes to tests (from @clayborg's feedback) and disabled them for non Linux-x86(_64) builds, as I don't have the ability do anything about an ARM64 Linux failure. If I had to guess, I'd say the toolchain on the buildbots isn't respecting the `-Wl,--build-id` flag. Maybe, one day, when I have a Linux AArch64 system I'll dig in to it. From the reverted PR: I've migrated the tests in my https://github.com/llvm/llvm-project/pull/79181 from shell to API (at @JDevlieghere's suggestion) and addressed a couple issues that were exposed during testing. The tests first test the "normal" situation (no DebugInfoD involvement, just normal debug files sitting around), then the "no debug info" situation (to make sure the test is seeing failure properly), then it tests to validate that when DebugInfoD returns the symbols, things work properly. This is duplicated for DWP/split-dwarf scenarios. --------- Co-authored-by: Kevin Frei --- .../Python/lldbsuite/test/make/Makefile.rules | 26 ++- .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 38 +++-- lldb/source/Plugins/SymbolLocator/CMakeLists.txt | 7 +- .../Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp | 29 +++- lldb/test/API/debuginfod/Normal/Makefile | 19 +++ lldb/test/API/debuginfod/Normal/TestDebuginfod.py | 179 ++++++++++++++++++++ lldb/test/API/debuginfod/Normal/main.c | 7 + lldb/test/API/debuginfod/SplitDWARF/Makefile | 23 +++ .../API/debuginfod/SplitDWARF/TestDebuginfodDWP.py | 188 +++++++++++++++++++++ lldb/test/API/debuginfod/SplitDWARF/main.c | 7 + 10 files changed, 506 insertions(+), 17 deletions(-) create mode 100644 lldb/test/API/debuginfod/Normal/Makefile create mode 100644 lldb/test/API/debuginfod/Normal/TestDebuginfod.py create mode 100644 lldb/test/API/debuginfod/Normal/main.c create mode 100644 lldb/test/API/debuginfod/SplitDWARF/Makefile create mode 100644 lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py create mode 100644 lldb/test/API/debuginfod/SplitDWARF/main.c diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index bfd249c..ee8793f 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -51,7 +51,7 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../ # # GNUWin32 uname gives "windows32" or "server version windows32" while # some versions of MSYS uname return "MSYS_NT*", but most environments -# standardize on "Windows_NT", so we'll make it consistent here. +# standardize on "Windows_NT", so we'll make it consistent here. # When running tests from Visual Studio, the environment variable isn't # inherited all the way down to the process spawned for make. #---------------------------------------------------------------------- @@ -210,6 +210,12 @@ else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" DSYM = $(EXE).debug endif + + ifeq "$(MAKE_DWP)" "YES" + MAKE_DWO := YES + DWP_NAME = $(EXE).dwp + DYLIB_DWP_NAME = $(DYLIB_NAME).dwp + endif endif LIMIT_DEBUG_INFO_FLAGS = @@ -357,6 +363,7 @@ ifneq "$(OS)" "Darwin" OBJCOPY ?= $(call replace_cc_with,objcopy) ARCHIVER ?= $(call replace_cc_with,ar) + DWP ?= $(call replace_cc_with,dwp) override AR = $(ARCHIVER) endif @@ -527,6 +534,10 @@ ifneq "$(CXX)" "" endif endif +ifeq "$(GEN_GNU_BUILD_ID)" "YES" + LDFLAGS += -Wl,--build-id +endif + #---------------------------------------------------------------------- # DYLIB_ONLY variable can be used to skip the building of a.out. # See the sections below regarding dSYM file as well as the building of @@ -565,10 +576,17 @@ else endif else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" +ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" + cp "$(EXE)" "$(EXE).unstripped" +endif $(OBJCOPY) --only-keep-debug "$(EXE)" "$(DSYM)" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DSYM)" "$(EXE)" "$(EXE)" endif +ifeq "$(MAKE_DWP)" "YES" + $(DWP) -o "$(DWP_NAME)" $(DWOS) endif +endif + #---------------------------------------------------------------------- # Make the dylib @@ -610,9 +628,15 @@ endif else $(LD) $(DYLIB_OBJECTS) $(LDFLAGS) -shared -o "$(DYLIB_FILENAME)" ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" + ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" + cp "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).unstripped" + endif $(OBJCOPY) --only-keep-debug "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).debug" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DYLIB_FILENAME).debug" "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME)" endif +ifeq "$(MAKE_DWP)" "YES" + $(DWP) -o $(DYLIB_DWP_FILE) $(DYLIB_DWOS) +endif endif #---------------------------------------------------------------------- diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 49f13d2..dafdf24 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4378,26 +4378,38 @@ const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); ModuleSpec module_spec; module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec(); + FileSpec dwp_filespec; for (const auto &symfile : symfiles.files()) { module_spec.GetSymbolFileSpec() = FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle()); LLDB_LOG(log, "Searching for DWP using: \"{0}\"", module_spec.GetSymbolFileSpec()); - FileSpec dwp_filespec = + dwp_filespec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); if (FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - GetObjectFile()->GetModule(), &dwp_filespec, 0, - FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, - dwp_file_data_offset); - if (dwp_obj_file) { - m_dwp_symfile = std::make_shared( - *this, dwp_obj_file, DIERef::k_file_index_mask); - break; - } + break; + } + } + if (!FileSystem::Instance().Exists(dwp_filespec)) { + LLDB_LOG(log, "No DWP file found locally"); + // Fill in the UUID for the module we're trying to match for, so we can + // find the correct DWP file, as the Debuginfod plugin uses *only* this + // data to correctly match the DWP file with the binary. + module_spec.GetUUID() = m_objfile_sp->GetUUID(); + dwp_filespec = + PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); + } + if (FileSystem::Instance().Exists(dwp_filespec)) { + LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + GetObjectFile()->GetModule(), &dwp_filespec, 0, + FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, + dwp_file_data_offset); + if (dwp_obj_file) { + m_dwp_symfile = std::make_shared( + *this, dwp_obj_file, DIERef::k_file_index_mask); } } if (!m_dwp_symfile) { diff --git a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt index ca96962..3367022 100644 --- a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt @@ -1,5 +1,10 @@ +# Order matters here: the first symbol locator prevents further searching. +# For DWARF binaries that are both stripped and split, the Default plugin +# will return the stripped binary when asked for the ObjectFile, which then +# prevents an unstripped binary from being requested from the Debuginfod +# provider. +add_subdirectory(Debuginfod) add_subdirectory(Default) if (CMAKE_SYSTEM_NAME MATCHES "Darwin") add_subdirectory(DebugSymbols) endif() -add_subdirectory(Debuginfod) diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index b5fe35d..f296e65 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -44,6 +44,24 @@ llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() { "executables."; } +// If this is needed elsewhere, it can be exported/moved. +static bool IsDwpSymbolFile(const lldb::ModuleSP &module_sp, + const FileSpec &file_spec) { + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + // Try to create an ObjectFile from the file_spec. + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec), + dwp_file_data_sp, dwp_file_data_offset); + // The presence of a debug_cu_index section is the key identifying feature of + // a DWP file. Make sure we don't fill in the section list on dwp_obj_file + // (by calling GetSectionList(false)) as this function could be called before + // we may have all the symbol files collected and available. + return dwp_obj_file && ObjectFileELF::classof(dwp_obj_file.get()) && + dwp_obj_file->GetSectionList(false)->FindSectionByType( + eSectionTypeDWARFDebugCuIndex, false); +} + // CreateInstance // // Platforms can register a callback to use when creating symbol vendors to @@ -87,8 +105,15 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); FileSpec dsym_fspec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - if (!dsym_fspec) - return nullptr; + if (!dsym_fspec || IsDwpSymbolFile(module_sp, dsym_fspec)) { + // If we have a stripped binary or if we got a DWP file, we should prefer + // symbols in the executable acquired through a plugin. + ModuleSpec unstripped_spec = + PluginManager::LocateExecutableObjectFile(module_spec); + if (!unstripped_spec) + return nullptr; + dsym_fspec = unstripped_spec.GetFileSpec(); + } DataBufferSP dsym_file_data_sp; lldb::offset_t dsym_file_data_offset = 0; diff --git a/lldb/test/API/debuginfod/Normal/Makefile b/lldb/test/API/debuginfod/Normal/Makefile new file mode 100644 index 0000000..54bd7ad --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/Makefile @@ -0,0 +1,19 @@ +C_SOURCES := main.c + +# For normal (non DWP) Debuginfod tests, we need: + +# * The full binary: a.out.unstripped +# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and +# SPLIT_DEBUG_SYMBOLS set to YES + +# * The stripped binary (a.out) +# Produced by Makefile.rules with SPLIT_DEBUG_SYMBOLS set to YES + +# * The 'only-keep-debug' binary (a.out.debug) +# Produced below + +SPLIT_DEBUG_SYMBOLS := YES +SAVE_FULL_DEBUG_BINARY := YES +GEN_GNU_BUILD_ID := YES + +include Makefile.rules diff --git a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py new file mode 100644 index 0000000..2e87228 --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py @@ -0,0 +1,179 @@ +import os +import shutil +import tempfile + +import lldb +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +""" +Test support for the DebugInfoD network symbol acquisition protocol. +This one is for simple / no split-dwarf scenarios. + +For no-split-dwarf scenarios, there are 2 variations: +1 - A stripped binary with it's corresponding unstripped binary: +2 - A stripped binary with a corresponding --only-keep-debug symbols file +""" + + +# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots +@skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) +class DebugInfodTests(TestBase): + # No need to try every flavor of debug inf. + NO_DEBUG_INFO_TESTCASE = True + + def test_normal_no_symbols(self): + """ + Validate behavior with no symbols or symbol locator. + ('baseline negative' behavior) + """ + test_root = self.config_test(["a.out"]) + self.try_breakpoint(False) + + def test_normal_default(self): + """ + Validate behavior with symbols, but no symbol locator. + ('baseline positive' behavior) + """ + test_root = self.config_test(["a.out", "a.out.debug"]) + self.try_breakpoint(True) + + def test_debuginfod_symbols(self): + """ + Test behavior with the full binary available from Debuginfod as + 'debuginfo' from the plug-in. + """ + test_root = self.config_test(["a.out"], "a.out.unstripped") + self.try_breakpoint(True) + + def test_debuginfod_executable(self): + """ + Test behavior with the full binary available from Debuginfod as + 'executable' from the plug-in. + """ + test_root = self.config_test(["a.out"], None, "a.out.unstripped") + self.try_breakpoint(True) + + def test_debuginfod_okd_symbols(self): + """ + Test behavior with the 'only-keep-debug' symbols available from Debuginfod. + """ + test_root = self.config_test(["a.out"], "a.out.debug") + self.try_breakpoint(True) + + def try_breakpoint(self, should_have_loc): + """ + This function creates a target from self.aout, sets a function-name + breakpoint, and checks to see if we have a file/line location, + as a way to validate that the symbols have been loaded. + should_have_loc specifies if we're testing that symbols have or + haven't been loaded. + """ + target = self.dbg.CreateTarget(self.aout) + self.assertTrue(target and target.IsValid(), "Target is valid") + + bp = target.BreakpointCreateByName("func") + self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") + self.assertEqual(bp.GetNumLocations(), 1) + + loc = bp.GetLocationAtIndex(0) + self.assertTrue(loc and loc.IsValid(), "Location is valid") + addr = loc.GetAddress() + self.assertTrue(addr and addr.IsValid(), "Loc address is valid") + line_entry = addr.GetLineEntry() + self.assertEqual( + should_have_loc, + line_entry != None and line_entry.IsValid(), + "Loc line entry is valid", + ) + if should_have_loc: + self.assertEqual(line_entry.GetLine(), 4) + self.assertEqual( + line_entry.GetFileSpec().GetFilename(), + self.main_source_file.GetFilename(), + ) + self.dbg.DeleteTarget(target) + shutil.rmtree(self.tmp_dir) + + def config_test(self, local_files, debuginfo=None, executable=None): + """ + Set up a test with local_files[] copied to a different location + so that we control which files are, or are not, found in the file system. + Also, create a stand-alone file-system 'hosted' debuginfod server with the + provided debuginfo and executable files (if they exist) + + Make the filesystem look like: + + /tmp//test/[local_files] + + /tmp//cache (for lldb to use as a temp cache) + + /tmp//buildid//executable -> + /tmp//buildid//debuginfo -> + Returns the /tmp/ path + """ + + self.build() + + uuid = self.getUUID("a.out") + if not uuid: + self.fail("Could not get UUID for a.out") + return + self.main_source_file = lldb.SBFileSpec("main.c") + self.tmp_dir = tempfile.mkdtemp() + test_dir = os.path.join(self.tmp_dir, "test") + os.makedirs(test_dir) + + self.aout = "" + # Copy the files used by the test: + for f in local_files: + shutil.copy(self.getBuildArtifact(f), test_dir) + # The first item is the binary to be used for the test + if self.aout == "": + self.aout = os.path.join(test_dir, f) + + use_debuginfod = debuginfo != None or executable != None + + # Populated the 'file://... mocked' Debuginfod server: + if use_debuginfod: + os.makedirs(os.path.join(self.tmp_dir, "cache")) + uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) + os.makedirs(uuid_dir) + if debuginfo: + shutil.copy( + self.getBuildArtifact(debuginfo), + os.path.join(uuid_dir, "debuginfo"), + ) + if executable: + shutil.copy( + self.getBuildArtifact(executable), + os.path.join(uuid_dir, "executable"), + ) + + # Configure LLDB for the test: + self.runCmd( + "settings set symbols.enable-external-lookup %s" + % str(use_debuginfod).lower() + ) + self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") + if use_debuginfod: + self.runCmd( + "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" + % self.tmp_dir + ) + self.runCmd( + "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" + % self.tmp_dir + ) + + def getUUID(self, filename): + try: + target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) + module = target.GetModuleAtIndex(0) + uuid = module.GetUUIDString().replace("-", "").lower() + self.dbg.DeleteTarget(target) + return uuid if len(uuid) == 40 else None + except: + return None diff --git a/lldb/test/API/debuginfod/Normal/main.c b/lldb/test/API/debuginfod/Normal/main.c new file mode 100644 index 0000000..4c71846 --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/main.c @@ -0,0 +1,7 @@ +// This is a dump little pair of test files + +int func(int argc, const char *argv[]) { + return (argc + 1) * (argv[argc][0] + 2); +} + +int main(int argc, const char *argv[]) { return func(0, argv); } diff --git a/lldb/test/API/debuginfod/SplitDWARF/Makefile b/lldb/test/API/debuginfod/SplitDWARF/Makefile new file mode 100644 index 0000000..3ab9a96 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/Makefile @@ -0,0 +1,23 @@ +C_SOURCES := main.c + +# For split-dwarf Debuginfod tests, we need: + +# * A .DWP file (a.out.dwp) +# Produced by Makefile.rules with MAKE_DWP set to YES + +# * The "full" binary (missing things that live in .dwo's) (a.out.unstripped) +# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and +# SPLIT_DEBUG_SYMBOLS set to YES + +# * The stripped binary (a.out) +# Produced by Makefile.rules + +# * The 'only-keep-debug' binary (a.out.debug) +# Produced below + +MAKE_DWP := YES +SPLIT_DEBUG_SYMBOLS := YES +SAVE_FULL_DEBUG_BINARY := YES +GEN_GNU_BUILD_ID := YES + +include Makefile.rules diff --git a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py new file mode 100644 index 0000000..90db352 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py @@ -0,0 +1,188 @@ +""" +Test support for the DebugInfoD network symbol acquisition protocol. +""" +import os +import shutil +import tempfile + +import lldb +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +""" +Test support for the DebugInfoD network symbol acquisition protocol. +This file is for split-dwarf (dwp) scenarios. + +1 - A split binary target with it's corresponding DWP file +2 - A stripped, split binary target with an unstripped binary and a DWP file +3 - A stripped, split binary target with an --only-keep-debug symbols file and a DWP file +""" + + +# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots +@skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) +class DebugInfodDWPTests(TestBase): + # No need to try every flavor of debug inf. + NO_DEBUG_INFO_TESTCASE = True + + def test_normal_stripped(self): + """ + Validate behavior with a stripped binary, no symbols or symbol locator. + """ + self.config_test(["a.out"]) + self.try_breakpoint(False) + + def test_normal_stripped_split_with_dwp(self): + """ + Validate behavior with symbols, but no symbol locator. + """ + self.config_test(["a.out", "a.out.debug", "a.out.dwp"]) + self.try_breakpoint(True) + + def test_normal_stripped_only_dwp(self): + """ + Validate behavior *with* dwp symbols only, but missing other symbols, + but no symbol locator. This shouldn't work: without the other symbols + DWO's appear mostly useless. + """ + self.config_test(["a.out", "a.out.dwp"]) + self.try_breakpoint(False) + + def test_debuginfod_dwp_from_service(self): + """ + Test behavior with the unstripped binary, and DWP from the service. + """ + self.config_test(["a.out.debug"], "a.out.dwp") + self.try_breakpoint(True) + + def test_debuginfod_both_symfiles_from_service(self): + """ + Test behavior with a stripped binary, with the unstripped binary and + dwp symbols from Debuginfod. + """ + self.config_test(["a.out"], "a.out.dwp", "a.out.unstripped") + self.try_breakpoint(True) + + def test_debuginfod_both_okd_symfiles_from_service(self): + """ + Test behavior with both the only-keep-debug symbols and the dwp symbols + from Debuginfod. + """ + self.config_test(["a.out"], "a.out.dwp", "a.out.debug") + self.try_breakpoint(True) + + def try_breakpoint(self, should_have_loc): + """ + This function creates a target from self.aout, sets a function-name + breakpoint, and checks to see if we have a file/line location, + as a way to validate that the symbols have been loaded. + should_have_loc specifies if we're testing that symbols have or + haven't been loaded. + """ + target = self.dbg.CreateTarget(self.aout) + self.assertTrue(target and target.IsValid(), "Target is valid") + + bp = target.BreakpointCreateByName("func") + self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") + self.assertEqual(bp.GetNumLocations(), 1) + + loc = bp.GetLocationAtIndex(0) + self.assertTrue(loc and loc.IsValid(), "Location is valid") + addr = loc.GetAddress() + self.assertTrue(addr and addr.IsValid(), "Loc address is valid") + line_entry = addr.GetLineEntry() + self.assertEqual( + should_have_loc, + line_entry != None and line_entry.IsValid(), + "Loc line entry is valid", + ) + if should_have_loc: + self.assertEqual(line_entry.GetLine(), 4) + self.assertEqual( + line_entry.GetFileSpec().GetFilename(), + self.main_source_file.GetFilename(), + ) + self.dbg.DeleteTarget(target) + shutil.rmtree(self.tmp_dir) + + def config_test(self, local_files, debuginfo=None, executable=None): + """ + Set up a test with local_files[] copied to a different location + so that we control which files are, or are not, found in the file system. + Also, create a stand-alone file-system 'hosted' debuginfod server with the + provided debuginfo and executable files (if they exist) + + Make the filesystem look like: + + /tmp//test/[local_files] + + /tmp//cache (for lldb to use as a temp cache) + + /tmp//buildid//executable -> + /tmp//buildid//debuginfo -> + Returns the /tmp/ path + """ + + self.build() + + uuid = self.getUUID("a.out") + if not uuid: + self.fail("Could not get UUID for a.out") + return + self.main_source_file = lldb.SBFileSpec("main.c") + self.tmp_dir = tempfile.mkdtemp() + self.test_dir = os.path.join(self.tmp_dir, "test") + os.makedirs(self.test_dir) + + self.aout = "" + # Copy the files used by the test: + for f in local_files: + shutil.copy(self.getBuildArtifact(f), self.test_dir) + if self.aout == "": + self.aout = os.path.join(self.test_dir, f) + + use_debuginfod = debuginfo != None or executable != None + + # Populated the 'file://... mocked' Debuginfod server: + if use_debuginfod: + os.makedirs(os.path.join(self.tmp_dir, "cache")) + uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) + os.makedirs(uuid_dir) + if debuginfo: + shutil.copy( + self.getBuildArtifact(debuginfo), + os.path.join(uuid_dir, "debuginfo"), + ) + if executable: + shutil.copy( + self.getBuildArtifact(executable), + os.path.join(uuid_dir, "executable"), + ) + os.remove(self.getBuildArtifact("main.dwo")) + # Configure LLDB for the test: + self.runCmd( + "settings set symbols.enable-external-lookup %s" + % str(use_debuginfod).lower() + ) + self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") + if use_debuginfod: + self.runCmd( + "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" + % self.tmp_dir + ) + self.runCmd( + "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" + % self.tmp_dir + ) + + def getUUID(self, filename): + try: + target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) + module = target.GetModuleAtIndex(0) + uuid = module.GetUUIDString().replace("-", "").lower() + self.dbg.DeleteTarget(target) + return uuid if len(uuid) == 40 else None + except: + return None diff --git a/lldb/test/API/debuginfod/SplitDWARF/main.c b/lldb/test/API/debuginfod/SplitDWARF/main.c new file mode 100644 index 0000000..4c71846 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/main.c @@ -0,0 +1,7 @@ +// This is a dump little pair of test files + +int func(int argc, const char *argv[]) { + return (argc + 1) * (argv[argc][0] + 2); +} + +int main(int argc, const char *argv[]) { return func(0, argv); } -- cgit v1.1 From 607b4bc602eda79e97a91a9bc3552a6004e5ac47 Mon Sep 17 00:00:00 2001 From: Emma Pilkington Date: Wed, 3 Apr 2024 15:36:58 -0400 Subject: [AMDGPU] Add a missing COV6 case to getAMDHSACodeObjectVersion() (#87492) --- llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp | 2 ++ llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-cov5.s | 7 +++++++ 2 files changed, 9 insertions(+) diff --git a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp index 4b74376..4e00744 100644 --- a/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp +++ b/llvm/lib/Target/AMDGPU/Utils/AMDGPUBaseInfo.cpp @@ -182,6 +182,8 @@ unsigned getAMDHSACodeObjectVersion(unsigned ABIVersion) { return 4; case ELF::ELFABIVERSION_AMDGPU_HSA_V5: return 5; + case ELF::ELFABIVERSION_AMDGPU_HSA_V6: + return 6; default: return getDefaultAMDHSACodeObjectVersion(); } diff --git a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-cov5.s b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-cov5.s index ece36c6..5600bcd 100644 --- a/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-cov5.s +++ b/llvm/test/tools/llvm-objdump/ELF/AMDGPU/kd-cov5.s @@ -6,6 +6,13 @@ ; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj > %t.o ; RUN: llvm-objdump --disassemble-symbols=kernel.kd %t.o | FileCheck %s --check-prefixes=COV4,CHECK +;; Make sure we override the default COV in the disassembler on COV6 (there +;; currently aren't any differences between 5 and 6, so set the default to 4 so +;; we can verify that the default is at least overridden) +; RUN: sed 's/CODE_OBJECT_VERSION/6/g' %s \ +; RUN: | llvm-mc --triple=amdgcn-amd-amdhsa -mcpu=gfx1010 -mattr=-xnack,+wavefrontsize32,-wavefrontsize64 -filetype=obj > %t.o +; RUN: llvm-objdump -mllvm --amdhsa-code-object-version=4 --disassemble-symbols=kernel.kd %t.o | FileCheck %s --check-prefixes=COV5,CHECK + ;; Verify that .amdhsa_uses_dynamic_stack is only printed on COV5+. ; CHECK: .amdhsa_kernel kernel -- cgit v1.1 From 899855d2b11856a44e530fffe854d76be69b9008 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 15:58:58 -0400 Subject: [SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions. Compiler can improve analysis for operands of UIToFP/SIToFP instructions and operands of ICmp instruction. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/85966 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 50 +++++++++++++++++----- .../SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll | 4 +- .../X86/minbitwidth-node-with-multi-users.ll | 10 +++-- 3 files changed, 48 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9b87e6e..779c7b7 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1107,7 +1107,7 @@ public: MinBWs.clear(); ReductionBitWidth = 0; CastMaxMinBWSizes.reset(); - TruncNodes.clear(); + ExtraBitWidthNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -3683,8 +3683,9 @@ private: /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; - /// Indices of the vectorized trunc nodes. - DenseSet TruncNodes; + /// Indices of the vectorized nodes, which supposed to be the roots of the new + /// bitwidth analysis attempt, like trunc, IToFP or ICmp. + DenseSet ExtraBitWidthNodes; }; } // end namespace slpvectorizer @@ -6612,7 +6613,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, PrevMaxBW), std::min(DL->getTypeSizeInBits(VL0->getType()), PrevMinBW)); - TruncNodes.insert(VectorizableTree.size()); + ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); + } else if (ShuffleOrOp == Instruction::SIToFP || + ShuffleOrOp == Instruction::UIToFP) { + unsigned NumSignBits = + ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); + if (auto *OpI = dyn_cast(VL0->getOperand(0))) { + APInt Mask = DB->getDemandedBits(OpI); + NumSignBits = std::max(NumSignBits, Mask.countl_zero()); + } + if (NumSignBits * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) + ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -6660,6 +6672,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); + if (ShuffleOrOp == Instruction::ICmp) { + unsigned NumSignBits0 = + ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); + if (NumSignBits0 * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) + ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); + unsigned NumSignBits1 = + ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT); + if (NumSignBits1 * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(1)->getType())) + ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx); + } return; } case Instruction::Select: @@ -14302,7 +14326,8 @@ void BoUpSLP::computeMinimumValueSizes() { bool IsStoreOrInsertElt = VectorizableTree.front()->getOpcode() == Instruction::Store || VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && + if ((IsStoreOrInsertElt || UserIgnoreList) && + ExtraBitWidthNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; @@ -14506,16 +14531,21 @@ void BoUpSLP::computeMinimumValueSizes() { IsTopRoot = false; IsProfitableToDemoteRoot = true; - if (TruncNodes.empty()) { + if (ExtraBitWidthNodes.empty()) { NodeIdx = VectorizableTree.size(); } else { unsigned NewIdx = 0; do { - NewIdx = *TruncNodes.begin() + 1; - TruncNodes.erase(TruncNodes.begin()); - } while (NewIdx <= NodeIdx && !TruncNodes.empty()); + NewIdx = *ExtraBitWidthNodes.begin(); + ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin()); + } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty()); NodeIdx = NewIdx; - IsTruncRoot = true; + IsTruncRoot = any_of( + VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) { + return EI.EdgeIdx == 0 && + EI.UserTE->getOpcode() == Instruction::ICmp && + !EI.UserTE->isAltShuffle(); + }); } // If the maximum bit width we compute is less than the with of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index fc28d7a..e1fd8a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], ; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 136ab64..668d3c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -10,12 +10,14 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) -- cgit v1.1 From fa2bbea14df3273b3403f34cc295c56233fdbd0d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 13:10:16 -0700 Subject: Revert "[SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions." This reverts commit 899855d2b11856a44e530fffe854d76be69b9008 to fix the issue reported in https://lab.llvm.org/buildbot/#/builders/165/builds/51659. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 50 +++++----------------- .../SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll | 4 +- .../X86/minbitwidth-node-with-multi-users.ll | 10 ++--- 3 files changed, 16 insertions(+), 48 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 779c7b7..9b87e6e 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1107,7 +1107,7 @@ public: MinBWs.clear(); ReductionBitWidth = 0; CastMaxMinBWSizes.reset(); - ExtraBitWidthNodes.clear(); + TruncNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -3683,9 +3683,8 @@ private: /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; - /// Indices of the vectorized nodes, which supposed to be the roots of the new - /// bitwidth analysis attempt, like trunc, IToFP or ICmp. - DenseSet ExtraBitWidthNodes; + /// Indices of the vectorized trunc nodes. + DenseSet TruncNodes; }; } // end namespace slpvectorizer @@ -6613,18 +6612,7 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, PrevMaxBW), std::min(DL->getTypeSizeInBits(VL0->getType()), PrevMinBW)); - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); - } else if (ShuffleOrOp == Instruction::SIToFP || - ShuffleOrOp == Instruction::UIToFP) { - unsigned NumSignBits = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (auto *OpI = dyn_cast(VL0->getOperand(0))) { - APInt Mask = DB->getDemandedBits(OpI); - NumSignBits = std::max(NumSignBits, Mask.countl_zero()); - } - if (NumSignBits * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); + TruncNodes.insert(VectorizableTree.size()); } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -6672,18 +6660,6 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); - if (ShuffleOrOp == Instruction::ICmp) { - unsigned NumSignBits0 = - ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); - if (NumSignBits0 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); - unsigned NumSignBits1 = - ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT); - if (NumSignBits1 * 2 >= - DL->getTypeSizeInBits(VL0->getOperand(1)->getType())) - ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx); - } return; } case Instruction::Select: @@ -14326,8 +14302,7 @@ void BoUpSLP::computeMinimumValueSizes() { bool IsStoreOrInsertElt = VectorizableTree.front()->getOpcode() == Instruction::Store || VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && - ExtraBitWidthNodes.size() <= 1 && + if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; @@ -14531,21 +14506,16 @@ void BoUpSLP::computeMinimumValueSizes() { IsTopRoot = false; IsProfitableToDemoteRoot = true; - if (ExtraBitWidthNodes.empty()) { + if (TruncNodes.empty()) { NodeIdx = VectorizableTree.size(); } else { unsigned NewIdx = 0; do { - NewIdx = *ExtraBitWidthNodes.begin(); - ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin()); - } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty()); + NewIdx = *TruncNodes.begin() + 1; + TruncNodes.erase(TruncNodes.begin()); + } while (NewIdx <= NodeIdx && !TruncNodes.empty()); NodeIdx = NewIdx; - IsTruncRoot = any_of( - VectorizableTree[NewIdx]->UserTreeIndices, [](const EdgeInfo &EI) { - return EI.EdgeIdx == 0 && - EI.UserTE->getOpcode() == Instruction::ICmp && - !EI.UserTE->isAltShuffle(); - }); + IsTruncRoot = true; } // If the maximum bit width we compute is less than the with of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index e1fd8a7..fc28d7a 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], +; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], ; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 668d3c3..136ab64 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -10,14 +10,12 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> +; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] -; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32> +; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]] ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) -- cgit v1.1 From e506dd0f14ea9fe0b0ac6c3492a118d4d2244fee Mon Sep 17 00:00:00 2001 From: Christopher Di Bella Date: Wed, 3 Apr 2024 13:31:23 -0700 Subject: fully qualifies use of `detail` namespace (#87536) Some TUs apparently end up with an ambiguity between `::llvm::detail` and `support::detail`, so we close that gap at the source. --- llvm/include/llvm/ADT/iterator_range.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/llvm/include/llvm/ADT/iterator_range.h b/llvm/include/llvm/ADT/iterator_range.h index 7d288ea..6c66def 100644 --- a/llvm/include/llvm/ADT/iterator_range.h +++ b/llvm/include/llvm/ADT/iterator_range.h @@ -48,9 +48,10 @@ public: // See https://github.com/llvm/llvm-project/issues/63843 template #else - template , IteratorT>::value> * = nullptr> + template < + typename Container, + std::enable_if_t, IteratorT>::value> * = nullptr> #endif iterator_range(Container &&c) : begin_iterator(adl_begin(c)), end_iterator(adl_end(c)) { @@ -65,7 +66,8 @@ public: }; template -iterator_range(Container &&) -> iterator_range>; +iterator_range(Container &&) + -> iterator_range>; /// Convenience function for iterating over sub-ranges. /// -- cgit v1.1 From 3ee93f486293420852fb9ec95af9c5f54cecdb08 Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Thu, 4 Apr 2024 02:46:57 +0530 Subject: [libc] Added transitive bindings for OffsetType (#87397) Adding OffTType to fcntl.h and stdio.h 's Macro lists in libc/spec/posix.td as mentioned here: #87266 --- libc/config/baremetal/api.td | 5 ++++- libc/config/gpu/api.td | 6 +++++- libc/config/linux/api.td | 12 ++++++++++-- libc/include/CMakeLists.txt | 10 ++++++---- libc/spec/posix.td | 7 +++++-- libc/src/stdio/fseeko.h | 1 - libc/src/stdio/ftello.h | 1 - 7 files changed, 30 insertions(+), 12 deletions(-) diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td index 25aa06a..690edbd 100644 --- a/libc/config/baremetal/api.td +++ b/libc/config/baremetal/api.td @@ -57,7 +57,10 @@ def MathAPI : PublicAPI<"math.h"> { } def StdIOAPI : PublicAPI<"stdio.h"> { - let Types = ["size_t"]; + let Types = [ + "size_t", + "off_t", + ]; } def StdlibAPI : PublicAPI<"stdlib.h"> { diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index adaf5bf..523ad49 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -64,7 +64,11 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = ["size_t", "FILE"]; + let Types = [ + "FILE", + "off_t", + "size_t", + ]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index eb5ed80..9964971 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -49,7 +49,10 @@ def CTypeAPI : PublicAPI<"ctype.h"> { } def FCntlAPI : PublicAPI<"fcntl.h"> { - let Types = ["mode_t"]; + let Types = [ + "mode_t", + "off_t", + ]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { @@ -77,7 +80,12 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = ["size_t", "FILE", "cookie_io_functions_t"]; + let Types = [ + "FILE", + "cookie_io_functions_t", + "off_t", + "size_t", + ]; } def StdlibAPI : PublicAPI<"stdlib.h"> { diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 4203f0b..02c7dc8 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -41,9 +41,10 @@ add_gen_header( DEF_FILE fcntl.h.def GEN_HDR fcntl.h DEPENDS - .llvm_libc_common_h .llvm-libc-macros.fcntl_macros .llvm-libc-types.mode_t + .llvm-libc-types.off_t + .llvm_libc_common_h ) add_gen_header( @@ -264,13 +265,14 @@ add_gen_header( DEF_FILE stdio.h.def GEN_HDR stdio.h DEPENDS - .llvm_libc_common_h .llvm-libc-macros.file_seek_macros .llvm-libc-macros.stdio_macros - .llvm-libc-types.size_t - .llvm-libc-types.ssize_t .llvm-libc-types.FILE .llvm-libc-types.cookie_io_functions_t + .llvm-libc-types.off_t + .llvm-libc-types.size_t + .llvm-libc-types.ssize_t + .llvm_libc_common_h ) add_gen_header( diff --git a/libc/spec/posix.td b/libc/spec/posix.td index cfa8d3a..45f7ecf 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -210,7 +210,10 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec FCntl = HeaderSpec< "fcntl.h", [], // Macros - [ModeTType], + [ + ModeTType, + OffTType, + ], [], // Enumerations [ FunctionSpec< @@ -1180,7 +1183,7 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec StdIO = HeaderSpec< "stdio.h", [], // Macros - [], // Types + [OffTType], // Types [], // Enumerations [ FunctionSpec< diff --git a/libc/src/stdio/fseeko.h b/libc/src/stdio/fseeko.h index 3202ed2..77fb412 100644 --- a/libc/src/stdio/fseeko.h +++ b/libc/src/stdio/fseeko.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_SRC_STDIO_FSEEKO_H #include -#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/ftello.h b/libc/src/stdio/ftello.h index 0fdf13a..5ab17f9 100644 --- a/libc/src/stdio/ftello.h +++ b/libc/src/stdio/ftello.h @@ -10,7 +10,6 @@ #define LLVM_LIBC_SRC_STDIO_FTELLO_H #include -#include namespace LIBC_NAMESPACE { -- cgit v1.1 From 42cbceb0f0160d67145723613fda325dbd129308 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Wed, 3 Apr 2024 15:58:58 -0400 Subject: [SLP]Improve minbitwidth analysis for operands of IToFP and ICmp instructions. Compiler can improve analysis for operands of UIToFP/SIToFP instructions and operands of ICmp instruction. Reviewers: RKSimon Reviewed By: RKSimon Pull Request: https://github.com/llvm/llvm-project/pull/85966 --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 52 +++++++++++++++++----- .../SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll | 4 +- .../X86/minbitwidth-node-with-multi-users.ll | 10 +++-- 3 files changed, 50 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9b87e6e..9976954 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1107,7 +1107,7 @@ public: MinBWs.clear(); ReductionBitWidth = 0; CastMaxMinBWSizes.reset(); - TruncNodes.clear(); + ExtraBitWidthNodes.clear(); InstrElementSize.clear(); UserIgnoreList = nullptr; PostponedGathers.clear(); @@ -3683,8 +3683,9 @@ private: /// type sizes, used in the tree. std::optional> CastMaxMinBWSizes; - /// Indices of the vectorized trunc nodes. - DenseSet TruncNodes; + /// Indices of the vectorized nodes, which supposed to be the roots of the new + /// bitwidth analysis attempt, like trunc, IToFP or ICmp. + DenseSet ExtraBitWidthNodes; }; } // end namespace slpvectorizer @@ -6612,7 +6613,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, PrevMaxBW), std::min(DL->getTypeSizeInBits(VL0->getType()), PrevMinBW)); - TruncNodes.insert(VectorizableTree.size()); + ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); + } else if (ShuffleOrOp == Instruction::SIToFP || + ShuffleOrOp == Instruction::UIToFP) { + unsigned NumSignBits = + ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); + if (auto *OpI = dyn_cast(VL0->getOperand(0))) { + APInt Mask = DB->getDemandedBits(OpI); + NumSignBits = std::max(NumSignBits, Mask.countl_zero()); + } + if (NumSignBits * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) + ExtraBitWidthNodes.insert(VectorizableTree.size() + 1); } TreeEntry *TE = newTreeEntry(VL, Bundle /*vectorized*/, S, UserTreeIdx, ReuseShuffleIndicies); @@ -6660,6 +6672,18 @@ void BoUpSLP::buildTree_rec(ArrayRef VL, unsigned Depth, TE->setOperand(1, Right); buildTree_rec(Left, Depth + 1, {TE, 0}); buildTree_rec(Right, Depth + 1, {TE, 1}); + if (ShuffleOrOp == Instruction::ICmp) { + unsigned NumSignBits0 = + ComputeNumSignBits(VL0->getOperand(0), *DL, 0, AC, nullptr, DT); + if (NumSignBits0 * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(0)->getType())) + ExtraBitWidthNodes.insert(getOperandEntry(TE, 0)->Idx); + unsigned NumSignBits1 = + ComputeNumSignBits(VL0->getOperand(1), *DL, 0, AC, nullptr, DT); + if (NumSignBits1 * 2 >= + DL->getTypeSizeInBits(VL0->getOperand(1)->getType())) + ExtraBitWidthNodes.insert(getOperandEntry(TE, 1)->Idx); + } return; } case Instruction::Select: @@ -14302,7 +14326,8 @@ void BoUpSLP::computeMinimumValueSizes() { bool IsStoreOrInsertElt = VectorizableTree.front()->getOpcode() == Instruction::Store || VectorizableTree.front()->getOpcode() == Instruction::InsertElement; - if ((IsStoreOrInsertElt || UserIgnoreList) && TruncNodes.size() <= 1 && + if ((IsStoreOrInsertElt || UserIgnoreList) && + ExtraBitWidthNodes.size() <= 1 && (!CastMaxMinBWSizes || CastMaxMinBWSizes->second == 0 || CastMaxMinBWSizes->first / CastMaxMinBWSizes->second <= 2)) return; @@ -14506,16 +14531,23 @@ void BoUpSLP::computeMinimumValueSizes() { IsTopRoot = false; IsProfitableToDemoteRoot = true; - if (TruncNodes.empty()) { + if (ExtraBitWidthNodes.empty()) { NodeIdx = VectorizableTree.size(); } else { unsigned NewIdx = 0; do { - NewIdx = *TruncNodes.begin() + 1; - TruncNodes.erase(TruncNodes.begin()); - } while (NewIdx <= NodeIdx && !TruncNodes.empty()); + NewIdx = *ExtraBitWidthNodes.begin(); + ExtraBitWidthNodes.erase(ExtraBitWidthNodes.begin()); + } while (NewIdx <= NodeIdx && !ExtraBitWidthNodes.empty()); NodeIdx = NewIdx; - IsTruncRoot = true; + IsTruncRoot = + NodeIdx < VectorizableTree.size() && + any_of(VectorizableTree[NodeIdx]->UserTreeIndices, + [](const EdgeInfo &EI) { + return EI.EdgeIdx == 0 && + EI.UserTE->getOpcode() == Instruction::Trunc && + !EI.UserTE->isAltShuffle(); + }); } // If the maximum bit width we compute is less than the with of the roots' diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll index fc28d7a..e1fd8a7 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-icmp-to-trunc.ll @@ -19,8 +19,8 @@ define i1 @test(ptr noalias %0, i64 %1, ptr noalias %p, ptr %p1) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <2 x i24> [[TMP8]], ; CHECK-NEXT: [[TMP10:%.*]] = select <2 x i1> [[TMP9]], <2 x i24> , <2 x i24> [[TMP8]] ; CHECK-NEXT: [[TMP23:%.*]] = trunc <2 x i24> [[TMP10]] to <2 x i8> -; CHECK-NEXT: [[TMP11:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> -; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP11]], +; CHECK-NEXT: [[TMP26:%.*]] = zext <2 x i8> [[TMP23]] to <2 x i32> +; CHECK-NEXT: [[TMP12:%.*]] = and <2 x i32> [[TMP26]], ; CHECK-NEXT: [[TMP13:%.*]] = icmp eq <2 x i32> [[TMP12]], ; CHECK-NEXT: [[TMP25:%.*]] = select <2 x i1> [[TMP13]], <2 x i8> , <2 x i8> [[TMP23]] ; CHECK-NEXT: [[TMP14:%.*]] = zext <2 x i8> [[TMP25]] to <2 x i32> diff --git a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll index 136ab64..668d3c3 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/minbitwidth-node-with-multi-users.ll @@ -10,12 +10,14 @@ define void @test() { ; CHECK-NEXT: [[TMP3:%.*]] = select i1 false, i32 0, i32 0 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x i8> , i8 [[TMP1]], i32 0 ; CHECK-NEXT: [[TMP5:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> -; CHECK-NEXT: [[TMP6:%.*]] = sext <4 x i8> [[TMP5]] to <4 x i32> +; CHECK-NEXT: [[TMP15:%.*]] = trunc <4 x i8> [[TMP5]] to <4 x i1> ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <4 x i8> [[TMP4]], <4 x i8> poison, <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP8:%.*]] = or <4 x i8> [[TMP7]], zeroinitializer -; CHECK-NEXT: [[TMP9:%.*]] = sext <4 x i8> [[TMP8]] to <4 x i32> -; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i32> zeroinitializer, [[TMP6]] -; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i32> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP9:%.*]] = trunc <4 x i8> [[TMP8]] to <4 x i1> +; CHECK-NEXT: [[TMP10:%.*]] = or <4 x i1> zeroinitializer, [[TMP15]] +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <4 x i1> [[TMP9]], [[TMP10]] +; CHECK-NEXT: [[TMP16:%.*]] = shufflevector <4 x i1> [[TMP15]], <4 x i1> poison, <4 x i32> +; CHECK-NEXT: [[TMP6:%.*]] = zext <4 x i1> [[TMP16]] to <4 x i32> ; CHECK-NEXT: [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> , <4 x i32> ; CHECK-NEXT: [[TMP13:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP12]], <4 x i32> zeroinitializer ; CHECK-NEXT: [[TMP14:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP13]]) -- cgit v1.1 From d53b8291bff4542a9c3e2f1df050deafbe295fff Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 3 Apr 2024 22:14:30 +0100 Subject: [VectorCombine][X86] shuffle-of-casts.ll - adjust zext nneg tests to improve costs for testing Improves SSE vs AVX test results for #87510 --- .../VectorCombine/X86/shuffle-of-casts.ll | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index b922528..f804300 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -17,31 +17,31 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ret <16 x i32> %r } -define <16 x i32> @concat_zext_nneg_v8i8_v16i32(<8 x i8> %a0, <8 x i8> %a1) { -; CHECK-LABEL: @concat_zext_nneg_v8i8_v16i32( -; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i8> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i8> [[A1:%.*]] to <8 x i32> +define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32( +; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> ; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; - %x0 = zext nneg <8 x i8> %a0 to <8 x i32> - %x1 = zext nneg <8 x i8> %a1 to <8 x i32> + %x0 = zext nneg <8 x i16> %a0 to <8 x i32> + %x1 = zext nneg <8 x i16> %a1 to <8 x i32> %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> ret <16 x i32> %r } ; TODO - sext + zext nneg -> sext -define <8 x i32> @concat_sext_zext_nneg_v4i8_v8i32(<4 x i8> %a0, <4 x i8> %a1) { -; CHECK-LABEL: @concat_sext_zext_nneg_v4i8_v8i32( -; CHECK-NEXT: [[X0:%.*]] = sext <4 x i8> [[A0:%.*]] to <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext nneg <4 x i8> [[A1:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> -; CHECK-NEXT: ret <8 x i32> [[R]] +define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) { +; CHECK-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( +; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> +; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> +; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: ret <16 x i32> [[R]] ; - %x0 = sext <4 x i8> %a0 to <4 x i32> - %x1 = zext nneg <4 x i8> %a1 to <4 x i32> - %r = shufflevector <4 x i32> %x0, <4 x i32> %x1, <8 x i32> - ret <8 x i32> %r + %x0 = sext <8 x i16> %a0 to <8 x i32> + %x1 = zext nneg <8 x i16> %a1 to <8 x i32> + %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> + ret <16 x i32> %r } define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { -- cgit v1.1 From 718638d44d3f1033c1ea395244c07d971ec33a90 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Wed, 3 Apr 2024 14:49:39 -0700 Subject: [flang][runtime] Enable I/O APIs in F18 runtime offload builds. (#87543) --- flang/include/flang/Runtime/io-api.h | 164 ++++++++++++++-------------- flang/runtime/environment.cpp | 2 + flang/runtime/environment.h | 2 +- flang/runtime/freestanding-tools.h | 19 ++++ flang/runtime/io-api.cpp | 204 +++++++++++++++++------------------ flang/runtime/io-error.cpp | 9 +- flang/runtime/io-error.h | 2 +- flang/runtime/namelist.cpp | 46 ++++---- 8 files changed, 235 insertions(+), 213 deletions(-) diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h index 1b6c4f5..328afc7 100644 --- a/flang/include/flang/Runtime/io-api.h +++ b/flang/include/flang/Runtime/io-api.h @@ -92,18 +92,18 @@ constexpr std::size_t RecommendedInternalIoScratchAreaBytes( // Internal I/O to/from character arrays &/or non-default-kind character // requires a descriptor, which is copied. -Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayListOutput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayListInput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayFormattedOutput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &, +Cookie IODECL(BeginInternalArrayFormattedInput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -111,20 +111,20 @@ Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &, // Internal I/O to/from a default-kind character scalar can avoid a // descriptor. -Cookie IONAME(BeginInternalListOutput)(char *internal, +Cookie IODECL(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalListInput)(const char *internal, +Cookie IODECL(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalFormattedOutput)(char *internal, +Cookie IODECL(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInternalFormattedInput)(const char *internal, +Cookie IODECL(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -139,63 +139,63 @@ Cookie IONAME(BeginInternalFormattedInput)(const char *internal, // If handleError is false, and the unit number is out of range, the program // will be terminated. Otherwise, if unit is out of range, a nonzero Iostat // code is returned and ioMsg is set if it is not a nullptr. -enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, +enum Iostat IODECL(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); -enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IODECL(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); // External synchronous I/O initiation Cookie IODECL(BeginExternalListOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, +Cookie IODECL(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalFormattedOutput)(const char *format, std::size_t, +Cookie IODECL(BeginExternalFormattedOutput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginExternalFormattedInput)(const char *format, std::size_t, +Cookie IODECL(BeginExternalFormattedInput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, +Cookie IODECL(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, +Cookie IODECL(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(ID=) -Cookie IONAME(BeginWait)(ExternalUnit, AsynchronousId, +Cookie IODECL(BeginWait)(ExternalUnit, AsynchronousId, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(no ID=) -Cookie IONAME(BeginWaitAll)( +Cookie IODECL(BeginWaitAll)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // Other I/O statements -Cookie IONAME(BeginClose)( +Cookie IODECL(BeginClose)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginFlush)( +Cookie IODECL(BeginFlush)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginBackspace)( +Cookie IODECL(BeginBackspace)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginEndfile)( +Cookie IODECL(BeginEndfile)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginRewind)( +Cookie IODECL(BeginRewind)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // OPEN(UNIT=) and OPEN(NEWUNIT=) have distinct interfaces. -Cookie IONAME(BeginOpenUnit)( +Cookie IODECL(BeginOpenUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginOpenNewUnit)( +Cookie IODECL(BeginOpenNewUnit)( const char *sourceFile = nullptr, int sourceLine = 0); // The variant forms of INQUIRE() statements have distinct interfaces. // BeginInquireIoLength() is basically a no-op output statement. -Cookie IONAME(BeginInquireUnit)( +Cookie IODECL(BeginInquireUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInquireFile)(const char *, std::size_t, +Cookie IODECL(BeginInquireFile)(const char *, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IONAME(BeginInquireIoLength)( +Cookie IODECL(BeginInquireIoLength)( const char *sourceFile = nullptr, int sourceLine = 0); // If an I/O statement has any IOSTAT=, ERR=, END=, or EOR= specifiers, @@ -214,33 +214,33 @@ Cookie IONAME(BeginInquireIoLength)( // } // } // if (EndIoStatement(cookie) == FORTRAN_RUTIME_IOSTAT_END) goto label666; -void IONAME(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, +void IODECL(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, bool hasEnd = false, bool hasEor = false, bool hasIoMsg = false); // ASYNCHRONOUS='YES' or 'NO' on READ/WRITE/OPEN // Use GetAsynchronousId() to handle ID=. -bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); +bool IODECL(SetAsynchronous)(Cookie, const char *, std::size_t); // Control list options. These return false on a error that the // Begin...() call has specified will be handled by the caller. // The interfaces that pass a default-kind CHARACTER argument // are limited to passing specific case-insensitive keyword values. // ADVANCE=YES, NO -bool IONAME(SetAdvance)(Cookie, const char *, std::size_t); +bool IODECL(SetAdvance)(Cookie, const char *, std::size_t); // BLANK=NULL, ZERO -bool IONAME(SetBlank)(Cookie, const char *, std::size_t); +bool IODECL(SetBlank)(Cookie, const char *, std::size_t); // DECIMAL=COMMA, POINT -bool IONAME(SetDecimal)(Cookie, const char *, std::size_t); +bool IODECL(SetDecimal)(Cookie, const char *, std::size_t); // DELIM=APOSTROPHE, QUOTE, NONE -bool IONAME(SetDelim)(Cookie, const char *, std::size_t); +bool IODECL(SetDelim)(Cookie, const char *, std::size_t); // PAD=YES, NO -bool IONAME(SetPad)(Cookie, const char *, std::size_t); -bool IONAME(SetPos)(Cookie, std::int64_t); -bool IONAME(SetRec)(Cookie, std::int64_t); +bool IODECL(SetPad)(Cookie, const char *, std::size_t); +bool IODECL(SetPos)(Cookie, std::int64_t); +bool IODECL(SetRec)(Cookie, std::int64_t); // ROUND=UP, DOWN, ZERO, NEAREST, COMPATIBLE, PROCESSOR_DEFINED -bool IONAME(SetRound)(Cookie, const char *, std::size_t); +bool IODECL(SetRound)(Cookie, const char *, std::size_t); // SIGN=PLUS, SUPPRESS, PROCESSOR_DEFINED -bool IONAME(SetSign)(Cookie, const char *, std::size_t); +bool IODECL(SetSign)(Cookie, const char *, std::size_t); // Data item transfer for modes other than NAMELIST: // Any data object that can be passed as an actual argument without the @@ -256,34 +256,34 @@ bool IONAME(SetSign)(Cookie, const char *, std::size_t); // Once the statement has encountered an error, all following items will be // ignored and also return false; but compiled code should check for errors // and avoid the following items when they might crash. -bool IONAME(OutputDescriptor)(Cookie, const Descriptor &); -bool IONAME(InputDescriptor)(Cookie, const Descriptor &); +bool IODECL(OutputDescriptor)(Cookie, const Descriptor &); +bool IODECL(InputDescriptor)(Cookie, const Descriptor &); // Formatted (including list directed) I/O data items -bool IONAME(OutputInteger8)(Cookie, std::int8_t); -bool IONAME(OutputInteger16)(Cookie, std::int16_t); +bool IODECL(OutputInteger8)(Cookie, std::int8_t); +bool IODECL(OutputInteger16)(Cookie, std::int16_t); bool IODECL(OutputInteger32)(Cookie, std::int32_t); -bool IONAME(OutputInteger64)(Cookie, std::int64_t); -bool IONAME(OutputInteger128)(Cookie, common::int128_t); -bool IONAME(InputInteger)(Cookie, std::int64_t &, int kind = 8); -bool IONAME(OutputReal32)(Cookie, float); -bool IONAME(InputReal32)(Cookie, float &); -bool IONAME(OutputReal64)(Cookie, double); -bool IONAME(InputReal64)(Cookie, double &); -bool IONAME(OutputComplex32)(Cookie, float, float); -bool IONAME(InputComplex32)(Cookie, float[2]); -bool IONAME(OutputComplex64)(Cookie, double, double); -bool IONAME(InputComplex64)(Cookie, double[2]); -bool IONAME(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); -bool IONAME(OutputAscii)(Cookie, const char *, std::size_t); -bool IONAME(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); -bool IONAME(InputAscii)(Cookie, char *, std::size_t); -bool IONAME(OutputLogical)(Cookie, bool); -bool IONAME(InputLogical)(Cookie, bool &); +bool IODECL(OutputInteger64)(Cookie, std::int64_t); +bool IODECL(OutputInteger128)(Cookie, common::int128_t); +bool IODECL(InputInteger)(Cookie, std::int64_t &, int kind = 8); +bool IODECL(OutputReal32)(Cookie, float); +bool IODECL(InputReal32)(Cookie, float &); +bool IODECL(OutputReal64)(Cookie, double); +bool IODECL(InputReal64)(Cookie, double &); +bool IODECL(OutputComplex32)(Cookie, float, float); +bool IODECL(InputComplex32)(Cookie, float[2]); +bool IODECL(OutputComplex64)(Cookie, double, double); +bool IODECL(InputComplex64)(Cookie, double[2]); +bool IODECL(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); +bool IODECL(OutputAscii)(Cookie, const char *, std::size_t); +bool IODECL(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); +bool IODECL(InputAscii)(Cookie, char *, std::size_t); +bool IODECL(OutputLogical)(Cookie, bool); +bool IODECL(InputLogical)(Cookie, bool &); // NAMELIST I/O must be the only data item in an (otherwise) // list-directed I/O statement. -bool IONAME(OutputNamelist)(Cookie, const NamelistGroup &); -bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); +bool IODECL(OutputNamelist)(Cookie, const NamelistGroup &); +bool IODECL(InputNamelist)(Cookie, const NamelistGroup &); // When an I/O list item has a derived type with a specific defined // I/O subroutine of the appropriate generic kind for the active @@ -294,9 +294,9 @@ bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); // made such a generic interface inaccessible), these data item transfer // APIs enable the I/O runtime to make the right calls to defined I/O // subroutines. -bool IONAME(OutputDerivedType)( +bool IODECL(OutputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); -bool IONAME(InputDerivedType)( +bool IODECL(InputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); // Additional specifier interfaces for the connection-list of @@ -304,56 +304,56 @@ bool IONAME(InputDerivedType)( // SetDelim(), GetIoMsg(), SetPad(), SetRound(), SetSign(), // & SetAsynchronous() are also acceptable for OPEN. // ACCESS=SEQUENTIAL, DIRECT, STREAM -bool IONAME(SetAccess)(Cookie, const char *, std::size_t); +bool IODECL(SetAccess)(Cookie, const char *, std::size_t); // ACTION=READ, WRITE, or READWRITE -bool IONAME(SetAction)(Cookie, const char *, std::size_t); +bool IODECL(SetAction)(Cookie, const char *, std::size_t); // CARRIAGECONTROL=LIST, FORTRAN, NONE -bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); +bool IODECL(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP -bool IONAME(SetConvert)(Cookie, const char *, std::size_t); +bool IODECL(SetConvert)(Cookie, const char *, std::size_t); // ENCODING=UTF-8, DEFAULT -bool IONAME(SetEncoding)(Cookie, const char *, std::size_t); +bool IODECL(SetEncoding)(Cookie, const char *, std::size_t); // FORM=FORMATTED, UNFORMATTED -bool IONAME(SetForm)(Cookie, const char *, std::size_t); +bool IODECL(SetForm)(Cookie, const char *, std::size_t); // POSITION=ASIS, REWIND, APPEND -bool IONAME(SetPosition)(Cookie, const char *, std::size_t); -bool IONAME(SetRecl)(Cookie, std::size_t); // RECL= +bool IODECL(SetPosition)(Cookie, const char *, std::size_t); +bool IODECL(SetRecl)(Cookie, std::size_t); // RECL= // STATUS can be set during an OPEN or CLOSE statement. // For OPEN: STATUS=OLD, NEW, SCRATCH, REPLACE, UNKNOWN // For CLOSE: STATUS=KEEP, DELETE -bool IONAME(SetStatus)(Cookie, const char *, std::size_t); +bool IODECL(SetStatus)(Cookie, const char *, std::size_t); -bool IONAME(SetFile)(Cookie, const char *, std::size_t chars); +bool IODECL(SetFile)(Cookie, const char *, std::size_t chars); // Acquires the runtime-created unit number for OPEN(NEWUNIT=) -bool IONAME(GetNewUnit)(Cookie, int &, int kind = 4); +bool IODECL(GetNewUnit)(Cookie, int &, int kind = 4); // READ(SIZE=), after all input items -std::size_t IONAME(GetSize)(Cookie); +std::size_t IODECL(GetSize)(Cookie); // INQUIRE(IOLENGTH=), after all output items -std::size_t IONAME(GetIoLength)(Cookie); +std::size_t IODECL(GetIoLength)(Cookie); // GetIoMsg() does not modify its argument unless an error or // end-of-record/file condition is present. -void IONAME(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= +void IODECL(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= // Defines ID= on READ/WRITE(ASYNCHRONOUS='YES') -AsynchronousId IONAME(GetAsynchronousId)(Cookie); +AsynchronousId IODECL(GetAsynchronousId)(Cookie); // INQUIRE() specifiers are mostly identified by their NUL-terminated // case-insensitive names. // ACCESS, ACTION, ASYNCHRONOUS, BLANK, CONVERT, DECIMAL, DELIM, DIRECT, // ENCODING, FORM, FORMATTED, NAME, PAD, POSITION, READ, READWRITE, ROUND, // SEQUENTIAL, SIGN, STREAM, UNFORMATTED, WRITE: -bool IONAME(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); +bool IODECL(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); // EXIST, NAMED, OPENED, and PENDING (without ID): -bool IONAME(InquireLogical)(Cookie, InquiryKeywordHash, bool &); +bool IODECL(InquireLogical)(Cookie, InquiryKeywordHash, bool &); // PENDING with ID -bool IONAME(InquirePendingId)(Cookie, AsynchronousId, bool &); +bool IODECL(InquirePendingId)(Cookie, AsynchronousId, bool &); // NEXTREC, NUMBER, POS, RECL, SIZE -bool IONAME(InquireInteger64)( +bool IODECL(InquireInteger64)( Cookie, InquiryKeywordHash, std::int64_t &, int kind = 8); // This function must be called to end an I/O statement, and its diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index b74067a..b2c9665 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -49,6 +49,7 @@ static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) { } } +RT_OFFLOAD_API_GROUP_BEGIN Fortran::common::optional GetConvertFromString( const char *x, std::size_t n) { static const char *keywords[]{ @@ -68,6 +69,7 @@ Fortran::common::optional GetConvertFromString( return Fortran::common::nullopt; } } +RT_OFFLOAD_API_GROUP_END void ExecutionEnvironment::Configure(int ac, const char *av[], const char *env[], const EnvironmentDefaultList *envDefaults) { diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index 6c56993..b8b9f10 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -31,7 +31,7 @@ RT_OFFLOAD_VAR_GROUP_END // External unformatted I/O data conversions enum class Convert { Unknown, Native, LittleEndian, BigEndian, Swap }; -Fortran::common::optional GetConvertFromString( +RT_API_ATTRS Fortran::common::optional GetConvertFromString( const char *, std::size_t); struct ExecutionEnvironment { diff --git a/flang/runtime/freestanding-tools.h b/flang/runtime/freestanding-tools.h index 451bf13..9089dc6 100644 --- a/flang/runtime/freestanding-tools.h +++ b/flang/runtime/freestanding-tools.h @@ -52,6 +52,11 @@ #define STD_STRCPY_UNSUPPORTED 1 #endif +#if !defined(STD_STRCMP_UNSUPPORTED) && \ + (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) +#define STD_STRCMP_UNSUPPORTED 1 +#endif + namespace Fortran::runtime { #if STD_FILL_N_UNSUPPORTED @@ -176,5 +181,19 @@ static inline RT_API_ATTRS char *strcpy(char *dest, const char *src) { using std::strcpy; #endif // !STD_STRCPY_UNSUPPORTED +#if STD_STRCMP_UNSUPPORTED +// Provides alternative implementation for std::strcmp(), if +// it is not supported. +static inline RT_API_ATTRS int strcmp(const char *lhs, const char *rhs) { + while (*lhs != '\0' && *lhs == *rhs) { + ++lhs; + ++rhs; + } + return static_cast(*lhs) - static_cast(*rhs); +} +#else // !STD_STRCMP_UNSUPPORTED +using std::strcmp; +#endif // !STD_STRCMP_UNSUPPORTED + } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_FREESTANDING_TOOLS_H_ diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index 3a86c9f..ccb5b57 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -25,8 +25,9 @@ #include namespace Fortran::runtime::io { +RT_EXT_API_GROUP_BEGIN -const char *InquiryKeywordHashDecode( +RT_API_ATTRS const char *InquiryKeywordHashDecode( char *buffer, std::size_t n, InquiryKeywordHash hash) { if (n < 1) { return nullptr; @@ -44,7 +45,7 @@ const char *InquiryKeywordHashDecode( } template -Cookie BeginInternalArrayListIO(const Descriptor &descriptor, +RT_API_ATTRS Cookie BeginInternalArrayListIO(const Descriptor &descriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; @@ -54,14 +55,14 @@ Cookie BeginInternalArrayListIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayListOutput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( descriptor, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayListInput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( @@ -69,7 +70,7 @@ Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &descriptor, } template -Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, +RT_API_ATTRS Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { @@ -80,7 +81,7 @@ Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -89,7 +90,7 @@ Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, sourceLine); } -Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, +Cookie IODEF(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -110,14 +111,14 @@ RT_API_ATTRS Cookie BeginInternalListIO( ->ioStatementState(); } -Cookie IONAME(BeginInternalListOutput)(char *internal, +Cookie IODEF(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalListInput)(const char *internal, +Cookie IODEF(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, @@ -125,7 +126,7 @@ Cookie IONAME(BeginInternalListInput)(const char *internal, } template -Cookie BeginInternalFormattedIO( +RT_API_ATTRS Cookie BeginInternalFormattedIO( std::conditional_t *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, @@ -138,7 +139,7 @@ Cookie BeginInternalFormattedIO( ->ioStatementState(); } -Cookie IONAME(BeginInternalFormattedOutput)(char *internal, +Cookie IODEF(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -147,7 +148,7 @@ Cookie IONAME(BeginInternalFormattedOutput)(char *internal, sourceFile, sourceLine); } -Cookie IONAME(BeginInternalFormattedInput)(const char *internal, +Cookie IODEF(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -227,24 +228,22 @@ RT_API_ATTRS Cookie BeginExternalListIO( } } -RT_EXT_API_GROUP_BEGIN Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } -RT_EXT_API_GROUP_END -Cookie IONAME(BeginExternalListInput)( +Cookie IODEF(BeginExternalListInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } template -Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, - const Descriptor *formatDescriptor, ExternalUnit unitNumber, - const char *sourceFile, int sourceLine) { +RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, + std::size_t formatLength, const Descriptor *formatDescriptor, + ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; ExternalFileUnit *unit{GetOrCreateUnit( @@ -286,14 +285,14 @@ Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, } } -Cookie IONAME(BeginExternalFormattedOutput)(const char *format, +Cookie IODEF(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginExternalFormattedInput)(const char *format, +Cookie IODEF(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, @@ -301,7 +300,7 @@ Cookie IONAME(BeginExternalFormattedInput)(const char *format, } template -Cookie BeginUnformattedIO( +RT_API_ATTRS Cookie BeginUnformattedIO( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -352,19 +351,19 @@ Cookie BeginUnformattedIO( } } -Cookie IONAME(BeginUnformattedOutput)( +Cookie IODEF(BeginUnformattedOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginUnformattedInput)( +Cookie IODEF(BeginUnformattedInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) +Cookie IODEF(BeginOpenUnit)( // OPEN(without NEWUNIT=) ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; bool wasExtant{false}; @@ -384,7 +383,7 @@ Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) } } -Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) +Cookie IODEF(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; ExternalFileUnit &unit{ @@ -394,7 +393,7 @@ Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) sourceLine); } -Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, +Cookie IODEF(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -410,12 +409,12 @@ Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, terminator, unitNumber, id == 0 ? IostatOk : IostatBadWaitUnit); } } -Cookie IONAME(BeginWaitAll)( +Cookie IODEF(BeginWaitAll)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return IONAME(BeginWait)(unitNumber, 0 /*no ID=*/, sourceFile, sourceLine); } -Cookie IONAME(BeginClose)( +Cookie IODEF(BeginClose)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -434,7 +433,7 @@ Cookie IONAME(BeginClose)( } } -Cookie IONAME(BeginFlush)( +Cookie IODEF(BeginFlush)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -452,7 +451,7 @@ Cookie IONAME(BeginFlush)( } } -Cookie IONAME(BeginBackspace)( +Cookie IODEF(BeginBackspace)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -470,7 +469,7 @@ Cookie IONAME(BeginBackspace)( } } -Cookie IONAME(BeginEndfile)( +Cookie IODEF(BeginEndfile)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -490,7 +489,7 @@ Cookie IONAME(BeginEndfile)( } } -Cookie IONAME(BeginRewind)( +Cookie IODEF(BeginRewind)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -510,7 +509,7 @@ Cookie IONAME(BeginRewind)( } } -Cookie IONAME(BeginInquireUnit)( +Cookie IODEF(BeginInquireUnit)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -530,14 +529,14 @@ Cookie IONAME(BeginInquireUnit)( } } -Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, +Cookie IODEF(BeginInquireFile)(const char *path, std::size_t pathLength, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; auto trimmed{SaveDefaultCharacter( path, TrimTrailingSpaces(path, pathLength), terminator)}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp( - trimmed.get(), std::strlen(trimmed.get()))}) { + trimmed.get(), Fortran::runtime::strlen(trimmed.get()))}) { // INQUIRE(FILE=) to a connected unit if (ChildIo * child{unit->GetChildIo()}) { return &child->BeginIoStatement( @@ -554,7 +553,7 @@ Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, } } -Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { +Cookie IODEF(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; return &New{oom}(sourceFile, sourceLine) .release() @@ -563,7 +562,7 @@ Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { // Control list items -void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, +void IODEF(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, bool hasEnd, bool hasEor, bool hasIoMsg) { IoErrorHandler &handler{cookie->GetIoErrorHandler()}; if (hasIoStat) { @@ -583,8 +582,8 @@ void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, } } -static bool YesOrNo(const char *keyword, std::size_t length, const char *what, - IoErrorHandler &handler) { +static RT_API_ATTRS bool YesOrNo(const char *keyword, std::size_t length, + const char *what, IoErrorHandler &handler) { static const char *keywords[]{"YES", "NO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { case 0: @@ -598,8 +597,7 @@ static bool YesOrNo(const char *keyword, std::size_t length, const char *what, } } -bool IONAME(SetAdvance)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAdvance)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; bool nonAdvancing{!YesOrNo(keyword, length, "ADVANCE", handler)}; @@ -616,7 +614,7 @@ bool IONAME(SetAdvance)( return !handler.InError(); } -bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"NULL", "ZERO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -633,8 +631,7 @@ bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetDecimal)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetDecimal)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"COMMA", "POINT", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -651,7 +648,7 @@ bool IONAME(SetDecimal)( } } -bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"APOSTROPHE", "QUOTE", "NONE", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -671,14 +668,14 @@ bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; io.mutableModes().pad = YesOrNo(keyword, length, "PAD", handler); return !handler.InError(); } -bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { +bool IODEF(SetPos)(Cookie cookie, std::int64_t pos) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -689,7 +686,7 @@ bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { return false; } -bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { +bool IODEF(SetRec)(Cookie cookie, std::int64_t rec) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -705,7 +702,7 @@ bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { return true; } -bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"UP", "DOWN", "ZERO", "NEAREST", "COMPATIBLE", "PROCESSOR_DEFINED", nullptr}; @@ -735,7 +732,7 @@ bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{ "PLUS", "SUPPRESS", "PROCESSOR_DEFINED", nullptr}; @@ -754,7 +751,7 @@ bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -790,7 +787,7 @@ bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -832,7 +829,7 @@ bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetAsynchronous)( +bool IODEF(SetAsynchronous)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; @@ -859,7 +856,7 @@ bool IONAME(SetAsynchronous)( return !handler.InError(); } -bool IONAME(SetCarriagecontrol)( +bool IODEF(SetCarriagecontrol)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -891,8 +888,7 @@ bool IONAME(SetCarriagecontrol)( } } -bool IONAME(SetConvert)( - Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetConvert)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -916,7 +912,7 @@ bool IONAME(SetConvert)( } } -bool IONAME(SetEncoding)( +bool IODEF(SetEncoding)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -948,7 +944,7 @@ bool IONAME(SetEncoding)( return true; } -bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -976,7 +972,7 @@ bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IONAME(SetPosition)( +bool IODEF(SetPosition)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -1009,7 +1005,7 @@ bool IONAME(SetPosition)( return true; } -bool IONAME(SetRecl)(Cookie cookie, std::size_t n) { +bool IODEF(SetRecl)(Cookie cookie, std::size_t n) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1036,7 +1032,7 @@ bool IONAME(SetRecl)(Cookie cookie, std::size_t n) { } } -bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { +bool IODEF(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1090,7 +1086,7 @@ bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { "SetStatus() called when not in an OPEN or CLOSE statement"); } -bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { +bool IODEF(SetFile)(Cookie cookie, const char *path, std::size_t chars) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1107,7 +1103,7 @@ bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { return false; } -bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) { +bool IODEF(GetNewUnit)(Cookie cookie, int &unit, int kind) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1135,15 +1131,15 @@ bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) { // Data transfers -bool IONAME(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IODEF(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IODEF(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) { +bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger8")) { return false; } @@ -1154,7 +1150,7 @@ bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) { +bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger16")) { return false; } @@ -1165,7 +1161,6 @@ bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) { return descr::DescriptorIO(*cookie, descriptor); } -RT_EXT_API_GROUP_BEGIN bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger32")) { return false; @@ -1176,9 +1171,8 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { TypeCategory::Integer, 4, reinterpret_cast(&n), 0); return descr::DescriptorIO(*cookie, descriptor); } -RT_EXT_API_GROUP_END -bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { +bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger64")) { return false; } @@ -1190,7 +1184,7 @@ bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { } #ifdef __SIZEOF_INT128__ -bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) { +bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger128")) { return false; } @@ -1202,7 +1196,7 @@ bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) { } #endif -bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { +bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { if (!cookie->CheckFormattedStmtType("InputInteger")) { return false; } @@ -1213,7 +1207,7 @@ bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputReal32)(Cookie cookie, float x) { +bool IODEF(OutputReal32)(Cookie cookie, float x) { if (!cookie->CheckFormattedStmtType("OutputReal32")) { return false; } @@ -1223,7 +1217,7 @@ bool IONAME(OutputReal32)(Cookie cookie, float x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputReal64)(Cookie cookie, double x) { +bool IODEF(OutputReal64)(Cookie cookie, double x) { if (!cookie->CheckFormattedStmtType("OutputReal64")) { return false; } @@ -1233,7 +1227,7 @@ bool IONAME(OutputReal64)(Cookie cookie, double x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputReal32)(Cookie cookie, float &x) { +bool IODEF(InputReal32)(Cookie cookie, float &x) { if (!cookie->CheckFormattedStmtType("InputReal32")) { return false; } @@ -1243,7 +1237,7 @@ bool IONAME(InputReal32)(Cookie cookie, float &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputReal64)(Cookie cookie, double &x) { +bool IODEF(InputReal64)(Cookie cookie, double &x) { if (!cookie->CheckFormattedStmtType("InputReal64")) { return false; } @@ -1253,7 +1247,7 @@ bool IONAME(InputReal64)(Cookie cookie, double &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { +bool IODEF(OutputComplex32)(Cookie cookie, float r, float i) { if (!cookie->CheckFormattedStmtType("OutputComplex32")) { return false; } @@ -1265,7 +1259,7 @@ bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { +bool IODEF(OutputComplex64)(Cookie cookie, double r, double i) { if (!cookie->CheckFormattedStmtType("OutputComplex64")) { return false; } @@ -1277,7 +1271,7 @@ bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { +bool IODEF(InputComplex32)(Cookie cookie, float z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex32")) { return false; } @@ -1288,7 +1282,7 @@ bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { +bool IODEF(InputComplex64)(Cookie cookie, double z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex64")) { return false; } @@ -1299,7 +1293,7 @@ bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputCharacter)( +bool IODEF(OutputCharacter)( Cookie cookie, const char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("OutputCharacter")) { return false; @@ -1311,11 +1305,11 @@ bool IONAME(OutputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { +bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { return IONAME(OutputCharacter(cookie, x, length, 1)); } -bool IONAME(InputCharacter)( +bool IODEF(InputCharacter)( Cookie cookie, char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("InputCharacter")) { return false; @@ -1326,11 +1320,11 @@ bool IONAME(InputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { +bool IODEF(InputAscii)(Cookie cookie, char *x, std::size_t length) { return IONAME(InputCharacter)(cookie, x, length, 1); } -bool IONAME(OutputLogical)(Cookie cookie, bool truth) { +bool IODEF(OutputLogical)(Cookie cookie, bool truth) { if (!cookie->CheckFormattedStmtType("OutputLogical")) { return false; } @@ -1341,7 +1335,7 @@ bool IONAME(OutputLogical)(Cookie cookie, bool truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(InputLogical)(Cookie cookie, bool &truth) { +bool IODEF(InputLogical)(Cookie cookie, bool &truth) { if (!cookie->CheckFormattedStmtType("InputLogical")) { return false; } @@ -1352,17 +1346,17 @@ bool IONAME(InputLogical)(Cookie cookie, bool &truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IONAME(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IODEF(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -bool IONAME(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IODEF(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -std::size_t IONAME(GetSize)(Cookie cookie) { +std::size_t IODEF(GetSize)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1379,7 +1373,7 @@ std::size_t IONAME(GetSize)(Cookie cookie) { return 0; } -std::size_t IONAME(GetIoLength)(Cookie cookie) { +std::size_t IODEF(GetIoLength)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1395,7 +1389,7 @@ std::size_t IONAME(GetIoLength)(Cookie cookie) { return 0; } -void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { +void IODEF(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1406,7 +1400,7 @@ void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { } } -AsynchronousId IONAME(GetAsynchronousId)(Cookie cookie) { +AsynchronousId IODEF(GetAsynchronousId)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *ext{io.get_if()}) { @@ -1419,24 +1413,24 @@ AsynchronousId IONAME(GetAsynchronousId)(Cookie cookie) { return 0; } -bool IONAME(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, +bool IODEF(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, char *result, std::size_t length) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result, length); } -bool IONAME(InquireLogical)( +bool IODEF(InquireLogical)( Cookie cookie, InquiryKeywordHash inquiry, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result); } -bool IONAME(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { +bool IODEF(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(HashInquiryKeyword("PENDING"), id, result); } -bool IONAME(InquireInteger64)( +bool IODEF(InquireInteger64)( Cookie cookie, InquiryKeywordHash inquiry, std::int64_t &result, int kind) { IoStatementState &io{*cookie}; std::int64_t n{0}; // safe "undefined" value @@ -1452,17 +1446,15 @@ bool IONAME(InquireInteger64)( return false; } -RT_EXT_API_GROUP_BEGIN enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } -RT_EXT_API_GROUP_END template -static enum Iostat CheckUnitNumberInRangeImpl(INT unit, bool handleError, - char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, - int sourceLine) { +static RT_API_ATTRS enum Iostat CheckUnitNumberInRangeImpl(INT unit, + bool handleError, char *ioMsg, std::size_t ioMsgLength, + const char *sourceFile, int sourceLine) { static_assert(sizeof(INT) >= sizeof(ExternalUnit), "only intended to be used when the INT to ExternalUnit conversion is " "narrowing"); @@ -1494,15 +1486,15 @@ static enum Iostat CheckUnitNumberInRangeImpl(INT unit, bool handleError, return IostatOk; } -enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, - bool handleError, char *ioMsg, std::size_t ioMsgLength, - const char *sourceFile, int sourceLine) { +enum Iostat IODEF(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, + char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, + int sourceLine) { return CheckUnitNumberInRangeImpl( unit, handleError, ioMsg, ioMsgLength, sourceFile, sourceLine); } #ifdef __SIZEOF_INT128__ -enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IODEF(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, int sourceLine) { return CheckUnitNumberInRangeImpl( @@ -1525,3 +1517,5 @@ void std::__libcpp_verbose_abort(char const *format, ...) { std::abort(); } #endif + +RT_EXT_API_GROUP_END diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp index b006b82f..7a90966 100644 --- a/flang/runtime/io-error.cpp +++ b/flang/runtime/io-error.cpp @@ -109,8 +109,6 @@ void IoErrorHandler::SignalPendingError() { SignalError(error); } -RT_OFFLOAD_API_GROUP_END - void IoErrorHandler::SignalErrno() { SignalError(errno); } bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { @@ -127,7 +125,10 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { // in LLVM v9.0.1 with inadequate modification for Fortran, // since rectified. bool ok{false}; -#if HAVE_STRERROR_R +#if defined(RT_DEVICE_COMPILATION) + // strerror_r is not available on device. + msg = "errno description is not available on device"; +#elif HAVE_STRERROR_R // strerror_r is thread-safe. #if defined(__GLIBC__) && defined(_GNU_SOURCE) // glibc defines its own incompatible version of strerror_r @@ -157,4 +158,6 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { return false; } } + +RT_OFFLOAD_API_GROUP_END } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index 0fe11c9..426573e 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -61,7 +61,7 @@ public: RT_API_ATTRS void SignalPendingError(); RT_API_ATTRS int GetIoStat() const { return ioStat_; } - bool GetIoMsg(char *, std::size_t); + RT_API_ATTRS bool GetIoMsg(char *, std::size_t); private: enum Flag : std::uint8_t { diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index b502d41..b9eed21 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -17,16 +17,20 @@ namespace Fortran::runtime::io { +RT_VAR_GROUP_BEGIN // Max size of a group, symbol or component identifier that can appear in // NAMELIST input, plus a byte for NUL termination. -static constexpr std::size_t nameBufferSize{201}; +static constexpr RT_CONST_VAR_ATTRS std::size_t nameBufferSize{201}; +RT_VAR_GROUP_END -static inline char32_t GetComma(IoStatementState &io) { +RT_OFFLOAD_API_GROUP_BEGIN + +static inline RT_API_ATTRS char32_t GetComma(IoStatementState &io) { return io.mutableModes().editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; } -bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IODEF(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("OutputNamelist"); io.mutableModes().inNamelist = true; @@ -40,7 +44,8 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { if ((connection.NeedAdvance(prefixLen) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1))) || !EmitAscii(io, prefix, prefixLen) || - (connection.NeedAdvance(std::strlen(str) + (suffix != ' ')) && + (connection.NeedAdvance( + Fortran::runtime::strlen(str) + (suffix != ' ')) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1)))) { return false; } @@ -84,20 +89,20 @@ bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { return EmitUpperCase("/", 1, "", ' '); } -static constexpr bool IsLegalIdStart(char32_t ch) { +static constexpr RT_API_ATTRS bool IsLegalIdStart(char32_t ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || ch == '@'; } -static constexpr bool IsLegalIdChar(char32_t ch) { +static constexpr RT_API_ATTRS bool IsLegalIdChar(char32_t ch) { return IsLegalIdStart(ch) || (ch >= '0' && ch <= '9'); } -static constexpr char NormalizeIdChar(char32_t ch) { +static constexpr RT_API_ATTRS char NormalizeIdChar(char32_t ch) { return static_cast(ch >= 'A' && ch <= 'Z' ? ch - 'A' + 'a' : ch); } -static bool GetLowerCaseName( +static RT_API_ATTRS bool GetLowerCaseName( IoStatementState &io, char buffer[], std::size_t maxLength) { std::size_t byteLength{0}; if (auto ch{io.GetNextNonBlank(byteLength)}) { @@ -119,7 +124,7 @@ static bool GetLowerCaseName( return false; } -static Fortran::common::optional GetSubscriptValue( +static RT_API_ATTRS Fortran::common::optional GetSubscriptValue( IoStatementState &io) { Fortran::common::optional value; std::size_t byteCount{0}; @@ -152,8 +157,8 @@ static Fortran::common::optional GetSubscriptValue( return value; } -static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, - const Descriptor &source, const char *name) { +static RT_API_ATTRS bool HandleSubscripts(IoStatementState &io, + Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; // Allow for blanks in subscripts; they're nonstandard, but not // ambiguous within the parentheses. @@ -252,7 +257,7 @@ static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, return false; } -static void StorageSequenceExtension( +static RT_API_ATTRS void StorageSequenceExtension( Descriptor &desc, const Descriptor &source) { // Support the near-universal extension of NAMELIST input into a // designatable storage sequence identified by its initial scalar array @@ -274,7 +279,7 @@ static void StorageSequenceExtension( } } -static bool HandleSubstring( +static RT_API_ATTRS bool HandleSubstring( IoStatementState &io, Descriptor &desc, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; auto pair{desc.type().GetCategoryAndKind()}; @@ -335,7 +340,7 @@ static bool HandleSubstring( return false; } -static bool HandleComponent(IoStatementState &io, Descriptor &desc, +static RT_API_ATTRS bool HandleComponent(IoStatementState &io, Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; char compName[nameBufferSize]; @@ -344,7 +349,8 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, if (const typeInfo::DerivedType * type{addendum ? addendum->derivedType() : nullptr}) { if (const typeInfo::Component * - comp{type->FindDataComponent(compName, std::strlen(compName))}) { + comp{type->FindDataComponent( + compName, Fortran::runtime::strlen(compName))}) { bool createdDesc{false}; if (comp->rank() > 0 && source.rank() > 0) { // If base and component are both arrays, the component name @@ -408,7 +414,7 @@ static bool HandleComponent(IoStatementState &io, Descriptor &desc, // Advance to the terminal '/' of a namelist group or leading '&'/'$' // of the next. -static void SkipNamelistGroup(IoStatementState &io) { +static RT_API_ATTRS void SkipNamelistGroup(IoStatementState &io) { std::size_t byteCount{0}; while (auto ch{io.GetNextNonBlank(byteCount)}) { io.HandleRelativePosition(byteCount); @@ -431,7 +437,7 @@ static void SkipNamelistGroup(IoStatementState &io) { } } -bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("InputNamelist"); io.mutableModes().inNamelist = true; @@ -470,7 +476,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { handler.SignalError("NAMELIST input group has no name"); return false; } - if (std::strcmp(group.groupName, name) == 0) { + if (Fortran::runtime::strcmp(group.groupName, name) == 0) { break; // found it } SkipNamelistGroup(io); @@ -489,7 +495,7 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { } std::size_t itemIndex{0}; for (; itemIndex < group.items; ++itemIndex) { - if (std::strcmp(name, group.item[itemIndex].name) == 0) { + if (Fortran::runtime::strcmp(name, group.item[itemIndex].name) == 0) { break; } } @@ -590,8 +596,6 @@ bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { return true; } -RT_OFFLOAD_API_GROUP_BEGIN - bool IsNamelistNameOrSlash(IoStatementState &io) { if (auto *listInput{ io.get_if>()}) { -- cgit v1.1 From 4078763e2e73b4ef3f9e728f66cdf9e429d3f7a4 Mon Sep 17 00:00:00 2001 From: Damien L-G Date: Wed, 3 Apr 2024 17:51:46 -0400 Subject: [libc++] Fix copy/pasta error in atomic tests for `atomic_compare_exchange_{weak,strong}` (#87135) Spotted this minor mistake in the tests as I was looking into testing more thoroughly `atomic_ref`. The two argument overloads are tested just above. The names of the lambda clearly indicates that the intent was to test the one argument overload. --- .../atomics.types.float/compare_exchange_strong.pass.cpp | 4 ++-- .../atomics.types.float/compare_exchange_weak.pass.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_strong.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_strong.pass.cpp index 0b09a73..2f84f26 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_strong.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_strong.pass.cpp @@ -150,12 +150,12 @@ void test_impl() { test_seq_cst(store, load); auto store_one_arg = [](MaybeVolatile>& x, T old_val, T new_val) { - auto r = x.compare_exchange_strong(old_val, new_val, std::memory_order::seq_cst, std::memory_order_relaxed); + auto r = x.compare_exchange_strong(old_val, new_val, std::memory_order::seq_cst); assert(r); }; auto load_one_arg = [](MaybeVolatile>& x) { auto val = x.load(std::memory_order::relaxed); - while (!x.compare_exchange_strong(val, val, std::memory_order::seq_cst, std::memory_order_relaxed)) { + while (!x.compare_exchange_strong(val, val, std::memory_order::seq_cst)) { } return val; }; diff --git a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_weak.pass.cpp b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_weak.pass.cpp index f8a2f19..5a39ec7 100644 --- a/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_weak.pass.cpp +++ b/libcxx/test/std/atomics/atomics.types.generic/atomics.types.float/compare_exchange_weak.pass.cpp @@ -165,12 +165,12 @@ void test_impl() { auto store_one_arg = [](MaybeVolatile>& x, T old_val, T new_val) { // could fail spuriously, so put it in a loop - while (!x.compare_exchange_weak(old_val, new_val, std::memory_order::seq_cst, std::memory_order_relaxed)) { + while (!x.compare_exchange_weak(old_val, new_val, std::memory_order::seq_cst)) { } }; auto load_one_arg = [](MaybeVolatile>& x) { auto val = x.load(std::memory_order::relaxed); - while (!x.compare_exchange_weak(val, val, std::memory_order::seq_cst, std::memory_order_relaxed)) { + while (!x.compare_exchange_weak(val, val, std::memory_order::seq_cst)) { } return val; }; -- cgit v1.1 From 029e1d751503268e3d8b01db769e710835c3010d Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 3 Apr 2024 15:19:03 -0700 Subject: Revert "Revert "Revert "[clang][UBSan] Add implicit conversion check for bitfields""" (#87562) Reverts llvm/llvm-project#87529 Reverts #87518 https://lab.llvm.org/buildbot/#/builders/37/builds/33262 is still broken --- clang/docs/ReleaseNotes.rst | 7 - clang/docs/UndefinedBehaviorSanitizer.rst | 19 +- clang/include/clang/Basic/Sanitizers.def | 20 +- clang/lib/CodeGen/CGExpr.cpp | 37 +-- clang/lib/CodeGen/CGExprScalar.cpp | 257 +++------------------ clang/lib/CodeGen/CodeGenFunction.h | 15 -- clang/test/CodeGen/ubsan-bitfield-conversion.c | 61 ----- .../test/CodeGenCXX/ubsan-bitfield-conversion.cpp | 94 -------- clang/test/Driver/fsanitize.c | 28 +-- compiler-rt/lib/ubsan/ubsan_handlers.cpp | 27 +-- compiler-rt/lib/ubsan/ubsan_handlers.h | 1 - 11 files changed, 73 insertions(+), 493 deletions(-) delete mode 100644 clang/test/CodeGen/ubsan-bitfield-conversion.c delete mode 100644 clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index e4c0e49..8fc9253 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -198,10 +198,6 @@ Non-comprehensive list of changes in this release New Compiler Flags ------------------ -- ``-fsanitize=implicit-bitfield-conversion`` checks implicit truncation and - sign change. -- ``-fsanitize=implicit-integer-conversion`` a group that replaces the previous - group ``-fsanitize=implicit-conversion``. - ``-Wmissing-designated-field-initializers``, grouped under ``-Wmissing-field-initializers``. This diagnostic can be disabled to make ``-Wmissing-field-initializers`` behave @@ -215,9 +211,6 @@ Modified Compiler Flags - Added a new diagnostic flag ``-Wreturn-mismatch`` which is grouped under ``-Wreturn-type``, and moved some of the diagnostics previously controlled by ``-Wreturn-type`` under this new flag. Fixes #GH72116. -- ``-fsanitize=implicit-conversion`` is now a group for both - ``-fsanitize=implicit-integer-conversion`` and - ``-fsanitize=implicit-bitfield-conversion``. - Added ``-Wcast-function-type-mismatch`` under the ``-Wcast-function-type`` warning group. Moved the diagnostic previously controlled by diff --git a/clang/docs/UndefinedBehaviorSanitizer.rst b/clang/docs/UndefinedBehaviorSanitizer.rst index 531d56e..8f58c92 100644 --- a/clang/docs/UndefinedBehaviorSanitizer.rst +++ b/clang/docs/UndefinedBehaviorSanitizer.rst @@ -148,11 +148,6 @@ Available checks are: Issues caught by this sanitizer are not undefined behavior, but are often unintentional. - ``-fsanitize=integer-divide-by-zero``: Integer division by zero. - - ``-fsanitize=implicit-bitfield-conversion``: Implicit conversion from - integer of larger bit width to smaller bitfield, if that results in data - loss. This includes unsigned/signed truncations and sign changes, similarly - to how the ``-fsanitize=implicit-integer-conversion`` group works, but - explicitly for bitfields. - ``-fsanitize=nonnull-attribute``: Passing null pointer as a function parameter which is declared to never be null. - ``-fsanitize=null``: Use of a null pointer or creation of a null @@ -198,8 +193,8 @@ Available checks are: signed division overflow (``INT_MIN/-1``). Note that checks are still added even when ``-fwrapv`` is enabled. This sanitizer does not check for lossy implicit conversions performed before the computation (see - ``-fsanitize=implicit-integer-conversion``). Both of these two issues are handled - by ``-fsanitize=implicit-integer-conversion`` group of checks. + ``-fsanitize=implicit-conversion``). Both of these two issues are handled + by ``-fsanitize=implicit-conversion`` group of checks. - ``-fsanitize=unreachable``: If control flow reaches an unreachable program point. - ``-fsanitize=unsigned-integer-overflow``: Unsigned integer overflow, where @@ -207,7 +202,7 @@ Available checks are: type. Unlike signed integer overflow, this is not undefined behavior, but it is often unintentional. This sanitizer does not check for lossy implicit conversions performed before such a computation - (see ``-fsanitize=implicit-integer-conversion``). + (see ``-fsanitize=implicit-conversion``). - ``-fsanitize=vla-bound``: A variable-length array whose bound does not evaluate to a positive value. - ``-fsanitize=vptr``: Use of an object whose vptr indicates that it is of @@ -229,15 +224,11 @@ You can also use the following check groups: - ``-fsanitize=implicit-integer-arithmetic-value-change``: Catches implicit conversions that change the arithmetic value of the integer. Enables ``implicit-signed-integer-truncation`` and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-integer-conversion``: Checks for suspicious - behavior of implicit integer conversions. Enables + - ``-fsanitize=implicit-conversion``: Checks for suspicious + behavior of implicit conversions. Enables ``implicit-unsigned-integer-truncation``, ``implicit-signed-integer-truncation``, and ``implicit-integer-sign-change``. - - ``-fsanitize=implicit-conversion``: Checks for suspicious - behavior of implicit conversions. Enables - ``implicit-integer-conversion``, and - ``implicit-bitfield-conversion``. - ``-fsanitize=integer``: Checks for undefined or suspicious integer behavior (e.g. unsigned integer overflow). Enables ``signed-integer-overflow``, ``unsigned-integer-overflow``, diff --git a/clang/include/clang/Basic/Sanitizers.def b/clang/include/clang/Basic/Sanitizers.def index b228ffd07..c2137e3 100644 --- a/clang/include/clang/Basic/Sanitizers.def +++ b/clang/include/clang/Basic/Sanitizers.def @@ -163,24 +163,24 @@ SANITIZER_GROUP("implicit-integer-arithmetic-value-change", ImplicitIntegerArithmeticValueChange, ImplicitIntegerSignChange | ImplicitSignedIntegerTruncation) -SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, - ImplicitIntegerArithmeticValueChange | - ImplicitUnsignedIntegerTruncation) +SANITIZER("objc-cast", ObjCCast) -// Implicit bitfield sanitizers -SANITIZER("implicit-bitfield-conversion", ImplicitBitfieldConversion) +// FIXME: +//SANITIZER_GROUP("implicit-integer-conversion", ImplicitIntegerConversion, +// ImplicitIntegerArithmeticValueChange | +// ImplicitUnsignedIntegerTruncation) +//SANITIZER_GROUP("implicit-conversion", ImplicitConversion, +// ImplicitIntegerConversion) SANITIZER_GROUP("implicit-conversion", ImplicitConversion, - ImplicitIntegerConversion | - ImplicitBitfieldConversion) + ImplicitIntegerArithmeticValueChange | + ImplicitUnsignedIntegerTruncation) SANITIZER_GROUP("integer", Integer, - ImplicitIntegerConversion | IntegerDivideByZero | Shift | + ImplicitConversion | IntegerDivideByZero | Shift | SignedIntegerOverflow | UnsignedIntegerOverflow | UnsignedShiftBase) -SANITIZER("objc-cast", ObjCCast) - SANITIZER("local-bounds", LocalBounds) SANITIZER_GROUP("bounds", Bounds, ArrayBounds | LocalBounds) diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index 0c7f48f..5443235 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -5580,44 +5580,11 @@ LValue CodeGenFunction::EmitBinaryOperatorLValue(const BinaryOperator *E) { break; } - // TODO: Can we de-duplicate this code with the corresponding code in - // CGExprScalar, similar to the way EmitCompoundAssignmentLValue works? - RValue RV; - llvm::Value *Previous = nullptr; - QualType SrcType = E->getRHS()->getType(); - // Check if LHS is a bitfield, if RHS contains an implicit cast expression - // we want to extract that value and potentially (if the bitfield sanitizer - // is enabled) use it to check for an implicit conversion. - if (E->getLHS()->refersToBitField()) { - llvm::Value *RHS = - EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); - RV = RValue::get(RHS); - } else - RV = EmitAnyExpr(E->getRHS()); - + RValue RV = EmitAnyExpr(E->getRHS()); LValue LV = EmitCheckedLValue(E->getLHS(), TCK_Store); - if (RV.isScalar()) EmitNullabilityCheck(LV, RV.getScalarVal(), E->getExprLoc()); - - if (LV.isBitField()) { - llvm::Value *Result = nullptr; - // If bitfield sanitizers are enabled we want to use the result - // to check whether a truncation or sign change has occurred. - if (SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) - EmitStoreThroughBitfieldLValue(RV, LV, &Result); - else - EmitStoreThroughBitfieldLValue(RV, LV); - - // If the expression contained an implicit conversion, make sure - // to use the value before the scalar conversion. - llvm::Value *Src = Previous ? Previous : RV.getScalarVal(); - QualType DstType = E->getLHS()->getType(); - EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, - LV.getBitFieldInfo(), E->getExprLoc()); - } else - EmitStoreThroughLValue(RV, LV); - + EmitStoreThroughLValue(RV, LV); if (getLangOpts().OpenMP) CGM.getOpenMPRuntime().checkAndEmitLastprivateConditional(*this, E->getLHS()); diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp index a4ab8a11..397b497 100644 --- a/clang/lib/CodeGen/CGExprScalar.cpp +++ b/clang/lib/CodeGen/CGExprScalar.cpp @@ -15,7 +15,6 @@ #include "CGDebugInfo.h" #include "CGObjCRuntime.h" #include "CGOpenMPRuntime.h" -#include "CGRecordLayout.h" #include "CodeGenFunction.h" #include "CodeGenModule.h" #include "ConstantEmitter.h" @@ -309,7 +308,6 @@ public: llvm::Type *DstTy, SourceLocation Loc); /// Known implicit conversion check kinds. - /// This is used for bitfield conversion checks as well. /// Keep in sync with the enum of the same name in ubsan_handlers.h enum ImplicitConversionCheckKind : unsigned char { ICCK_IntegerTruncation = 0, // Legacy, was only used by clang 7. @@ -1105,21 +1103,6 @@ void ScalarExprEmitter::EmitIntegerTruncationCheck(Value *Src, QualType SrcType, {Src, Dst}); } -static llvm::Value *EmitIsNegativeTestHelper(Value *V, QualType VType, - const char *Name, - CGBuilderTy &Builder) { - bool VSigned = VType->isSignedIntegerOrEnumerationType(); - llvm::Type *VTy = V->getType(); - if (!VSigned) { - // If the value is unsigned, then it is never negative. - return llvm::ConstantInt::getFalse(VTy->getContext()); - } - llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); - return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, - llvm::Twine(Name) + "." + V->getName() + - ".negativitycheck"); -} - // Should be called within CodeGenFunction::SanitizerScope RAII scope. // Returns 'i1 false' when the conversion Src -> Dst changed the sign. static std::pair Value * { + // Is this value a signed type? + bool VSigned = VType->isSignedIntegerOrEnumerationType(); + llvm::Type *VTy = V->getType(); + if (!VSigned) { + // If the value is unsigned, then it is never negative. + // FIXME: can we encounter non-scalar VTy here? + return llvm::ConstantInt::getFalse(VTy->getContext()); + } + // Get the zero of the same type with which we will be comparing. + llvm::Constant *Zero = llvm::ConstantInt::get(VTy, 0); + // %V.isnegative = icmp slt %V, 0 + // I.e is %V *strictly* less than zero, does it have negative value? + return Builder.CreateICmp(llvm::ICmpInst::ICMP_SLT, V, Zero, + llvm::Twine(Name) + "." + V->getName() + + ".negativitycheck"); + }; + // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = - EmitIsNegativeTestHelper(Src, SrcType, "src", Builder); + llvm::Value *SrcIsNegative = EmitIsNegativeTest(Src, SrcType, "src"); // 2. Is the new Value negative? - llvm::Value *DstIsNegative = - EmitIsNegativeTestHelper(Dst, DstType, "dst", Builder); + llvm::Value *DstIsNegative = EmitIsNegativeTest(Dst, DstType, "dst"); // 3. Now, was the 'negativity status' preserved during the conversion? // NOTE: conversion from negative to zero is considered to change the sign. // (We want to get 'false' when the conversion changed the sign) @@ -1244,136 +1245,6 @@ void ScalarExprEmitter::EmitIntegerSignChangeCheck(Value *Src, QualType SrcType, {Src, Dst}); } -// Should be called within CodeGenFunction::SanitizerScope RAII scope. -// Returns 'i1 false' when the truncation Src -> Dst was lossy. -static std::pair> -EmitBitfieldTruncationCheckHelper(Value *Src, QualType SrcType, Value *Dst, - QualType DstType, CGBuilderTy &Builder) { - bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); - bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); - - ScalarExprEmitter::ImplicitConversionCheckKind Kind; - if (!SrcSigned && !DstSigned) - Kind = ScalarExprEmitter::ICCK_UnsignedIntegerTruncation; - else - Kind = ScalarExprEmitter::ICCK_SignedIntegerTruncation; - - llvm::Value *Check = nullptr; - // 1. Extend the truncated value back to the same width as the Src. - Check = Builder.CreateIntCast(Dst, Src->getType(), DstSigned, "bf.anyext"); - // 2. Equality-compare with the original source value - Check = Builder.CreateICmpEQ(Check, Src, "bf.truncheck"); - // If the comparison result is 'i1 false', then the truncation was lossy. - - return std::make_pair( - Kind, std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); -} - -// Should be called within CodeGenFunction::SanitizerScope RAII scope. -// Returns 'i1 false' when the conversion Src -> Dst changed the sign. -static std::pair> -EmitBitfieldSignChangeCheckHelper(Value *Src, QualType SrcType, Value *Dst, - QualType DstType, CGBuilderTy &Builder) { - // 1. Was the old Value negative? - llvm::Value *SrcIsNegative = - EmitIsNegativeTestHelper(Src, SrcType, "bf.src", Builder); - // 2. Is the new Value negative? - llvm::Value *DstIsNegative = - EmitIsNegativeTestHelper(Dst, DstType, "bf.dst", Builder); - // 3. Now, was the 'negativity status' preserved during the conversion? - // NOTE: conversion from negative to zero is considered to change the sign. - // (We want to get 'false' when the conversion changed the sign) - // So we should just equality-compare the negativity statuses. - llvm::Value *Check = nullptr; - Check = - Builder.CreateICmpEQ(SrcIsNegative, DstIsNegative, "bf.signchangecheck"); - // If the comparison result is 'false', then the conversion changed the sign. - return std::make_pair( - ScalarExprEmitter::ICCK_IntegerSignChange, - std::make_pair(Check, SanitizerKind::ImplicitBitfieldConversion)); -} - -void CodeGenFunction::EmitBitfieldConversionCheck(Value *Src, QualType SrcType, - Value *Dst, QualType DstType, - const CGBitFieldInfo &Info, - SourceLocation Loc) { - - if (!SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) - return; - - // We only care about int->int conversions here. - // We ignore conversions to/from pointer and/or bool. - if (!PromotionIsPotentiallyEligibleForImplicitIntegerConversionCheck(SrcType, - DstType)) - return; - - if (DstType->isBooleanType() || SrcType->isBooleanType()) - return; - - // This should be truncation of integral types. - assert(isa(Src->getType()) && - isa(Dst->getType()) && "non-integer llvm type"); - - // TODO: Calculate src width to avoid emitting code - // for unecessary cases. - unsigned SrcBits = ConvertType(SrcType)->getScalarSizeInBits(); - unsigned DstBits = Info.Size; - - bool SrcSigned = SrcType->isSignedIntegerOrEnumerationType(); - bool DstSigned = DstType->isSignedIntegerOrEnumerationType(); - - CodeGenFunction::SanitizerScope SanScope(this); - - std::pair> - Check; - - // Truncation - bool EmitTruncation = DstBits < SrcBits; - // If Dst is signed and Src unsigned, we want to be more specific - // about the CheckKind we emit, in this case we want to emit - // ICCK_SignedIntegerTruncationOrSignChange. - bool EmitTruncationFromUnsignedToSigned = - EmitTruncation && DstSigned && !SrcSigned; - // Sign change - bool SameTypeSameSize = SrcSigned == DstSigned && SrcBits == DstBits; - bool BothUnsigned = !SrcSigned && !DstSigned; - bool LargerSigned = (DstBits > SrcBits) && DstSigned; - // We can avoid emitting sign change checks in some obvious cases - // 1. If Src and Dst have the same signedness and size - // 2. If both are unsigned sign check is unecessary! - // 3. If Dst is signed and bigger than Src, either - // sign-extension or zero-extension will make sure - // the sign remains. - bool EmitSignChange = !SameTypeSameSize && !BothUnsigned && !LargerSigned; - - if (EmitTruncation) - Check = - EmitBitfieldTruncationCheckHelper(Src, SrcType, Dst, DstType, Builder); - else if (EmitSignChange) { - assert(((SrcBits != DstBits) || (SrcSigned != DstSigned)) && - "either the widths should be different, or the signednesses."); - Check = - EmitBitfieldSignChangeCheckHelper(Src, SrcType, Dst, DstType, Builder); - } else - return; - - ScalarExprEmitter::ImplicitConversionCheckKind CheckKind = Check.first; - if (EmitTruncationFromUnsignedToSigned) - CheckKind = ScalarExprEmitter::ICCK_SignedIntegerTruncationOrSignChange; - - llvm::Constant *StaticArgs[] = { - EmitCheckSourceLocation(Loc), EmitCheckTypeDescriptor(SrcType), - EmitCheckTypeDescriptor(DstType), - llvm::ConstantInt::get(Builder.getInt8Ty(), CheckKind), - llvm::ConstantInt::get(Builder.getInt32Ty(), Info.Size)}; - - EmitCheck(Check.second, SanitizerHandler::ImplicitConversion, StaticArgs, - {Src, Dst}); -} - Value *ScalarExprEmitter::EmitScalarCast(Value *Src, QualType SrcType, QualType DstType, llvm::Type *SrcTy, llvm::Type *DstTy, @@ -2749,8 +2620,6 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, llvm::PHINode *atomicPHI = nullptr; llvm::Value *value; llvm::Value *input; - llvm::Value *Previous = nullptr; - QualType SrcType = E->getType(); int amount = (isInc ? 1 : -1); bool isSubtraction = !isInc; @@ -2839,8 +2708,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, "base or promoted) will be signed, or the bitwidths will match."); } if (CGF.SanOpts.hasOneOf( - SanitizerKind::ImplicitIntegerArithmeticValueChange | - SanitizerKind::ImplicitBitfieldConversion) && + SanitizerKind::ImplicitIntegerArithmeticValueChange) && canPerformLossyDemotionCheck) { // While `x += 1` (for `x` with width less than int) is modeled as // promotion+arithmetics+demotion, and we can catch lossy demotion with @@ -2851,26 +2719,13 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, // the increment/decrement in the wider type, and finally // perform the demotion. This will catch lossy demotions. - // We have a special case for bitfields defined using all the bits of the - // type. In this case we need to do the same trick as for the integer - // sanitizer checks, i.e., promotion -> increment/decrement -> demotion. - value = EmitScalarConversion(value, type, promotedType, E->getExprLoc()); Value *amt = llvm::ConstantInt::get(value->getType(), amount, true); value = Builder.CreateAdd(value, amt, isInc ? "inc" : "dec"); // Do pass non-default ScalarConversionOpts so that sanitizer check is - // emitted if LV is not a bitfield, otherwise the bitfield sanitizer - // checks will take care of the conversion. - ScalarConversionOpts Opts; - if (!LV.isBitField()) - Opts = ScalarConversionOpts(CGF.SanOpts); - else if (CGF.SanOpts.has(SanitizerKind::ImplicitBitfieldConversion)) { - Previous = value; - SrcType = promotedType; - } - + // emitted. value = EmitScalarConversion(value, promotedType, type, E->getExprLoc(), - Opts); + ScalarConversionOpts(CGF.SanOpts)); // Note that signed integer inc/dec with width less than int can't // overflow because of promotion rules; we're just eliding a few steps @@ -3055,12 +2910,9 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV, } // Store the updated result through the lvalue. - if (LV.isBitField()) { - Value *Src = Previous ? Previous : value; + if (LV.isBitField()) CGF.EmitStoreThroughBitfieldLValue(RValue::get(value), LV, &value); - CGF.EmitBitfieldConversionCheck(Src, SrcType, value, E->getType(), - LV.getBitFieldInfo(), E->getExprLoc()); - } else + else CGF.EmitStoreThroughLValue(RValue::get(value), LV); // If this is a postinc, return the value read from memory, otherwise use the @@ -3565,15 +3417,8 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // Convert the result back to the LHS type, // potentially with Implicit Conversion sanitizer check. - // If LHSLV is a bitfield, use default ScalarConversionOpts - // to avoid emit any implicit integer checks. - Value *Previous = nullptr; - if (LHSLV.isBitField()) { - Previous = Result; - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc); - } else - Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, - ScalarConversionOpts(CGF.SanOpts)); + Result = EmitScalarConversion(Result, PromotionTypeCR, LHSTy, Loc, + ScalarConversionOpts(CGF.SanOpts)); if (atomicPHI) { llvm::BasicBlock *curBlock = Builder.GetInsertBlock(); @@ -3592,14 +3437,9 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue( // specially because the result is altered by the store, i.e., [C99 6.5.16p1] // 'An assignment expression has the value of the left operand after the // assignment...'. - if (LHSLV.isBitField()) { - Value *Src = Previous ? Previous : Result; - QualType SrcType = E->getRHS()->getType(); - QualType DstType = E->getLHS()->getType(); + if (LHSLV.isBitField()) CGF.EmitStoreThroughBitfieldLValue(RValue::get(Result), LHSLV, &Result); - CGF.EmitBitfieldConversionCheck(Src, SrcType, Result, DstType, - LHSLV.getBitFieldInfo(), E->getExprLoc()); - } else + else CGF.EmitStoreThroughLValue(RValue::get(Result), LHSLV); if (CGF.getLangOpts().OpenMP) @@ -4711,24 +4551,6 @@ Value *ScalarExprEmitter::EmitCompare(const BinaryOperator *E, E->getExprLoc()); } -llvm::Value *CodeGenFunction::EmitWithOriginalRHSBitfieldAssignment( - const BinaryOperator *E, Value *Previous, QualType *SrcType) { - // In case we have the integer or bitfield sanitizer checks enabled - // we want to get the expression before scalar conversion. - if (auto *ICE = dyn_cast(E->getRHS())) { - CastKind Kind = ICE->getCastKind(); - if (Kind == CK_IntegralCast) { - *SrcType = ICE->getSubExpr()->getType(); - Previous = EmitScalarExpr(ICE->getSubExpr()); - // Pass default ScalarConversionOpts to avoid emitting - // integer sanitizer checks as E refers to bitfield. - return EmitScalarConversion(Previous, *SrcType, ICE->getType(), - ICE->getExprLoc()); - } - } - return EmitScalarExpr(E->getRHS()); -} - Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { bool Ignore = TestAndClearIgnoreResultAssign(); @@ -4757,16 +4579,7 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { case Qualifiers::OCL_None: // __block variables need to have the rhs evaluated first, plus // this should improve codegen just a little. - Value *Previous = nullptr; - QualType SrcType = E->getRHS()->getType(); - // Check if LHS is a bitfield, if RHS contains an implicit cast expression - // we want to extract that value and potentially (if the bitfield sanitizer - // is enabled) use it to check for an implicit conversion. - if (E->getLHS()->refersToBitField()) - RHS = CGF.EmitWithOriginalRHSBitfieldAssignment(E, Previous, &SrcType); - else - RHS = Visit(E->getRHS()); - + RHS = Visit(E->getRHS()); LHS = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store); // Store the value into the LHS. Bit-fields are handled specially @@ -4775,12 +4588,6 @@ Value *ScalarExprEmitter::VisitBinAssign(const BinaryOperator *E) { // the assignment...'. if (LHS.isBitField()) { CGF.EmitStoreThroughBitfieldLValue(RValue::get(RHS), LHS, &RHS); - // If the expression contained an implicit conversion, make sure - // to use the value before the scalar conversion. - Value *Src = Previous ? Previous : RHS; - QualType DstType = E->getLHS()->getType(); - CGF.EmitBitfieldConversionCheck(Src, SrcType, RHS, DstType, - LHS.getBitFieldInfo(), E->getExprLoc()); } else { CGF.EmitNullabilityCheck(LHS, RHS, E->getExprLoc()); CGF.EmitStoreThroughLValue(RValue::get(RHS), LHS); diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 99a7f51..e2a7e28 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -2786,21 +2786,6 @@ public: /// expression and compare the result against zero, returning an Int1Ty value. llvm::Value *EvaluateExprAsBool(const Expr *E); - /// Retrieve the implicit cast expression of the rhs in a binary operator - /// expression by passing pointers to Value and QualType - /// This is used for implicit bitfield conversion checks, which - /// must compare with the value before potential truncation. - llvm::Value *EmitWithOriginalRHSBitfieldAssignment(const BinaryOperator *E, - llvm::Value *Previous, - QualType *SrcType); - - /// Emit a check that an [implicit] conversion of a bitfield. It is not UB, - /// so we use the value after conversion. - void EmitBitfieldConversionCheck(llvm::Value *Src, QualType SrcType, - llvm::Value *Dst, QualType DstType, - const CGBitFieldInfo &Info, - SourceLocation Loc); - /// EmitIgnoredExpr - Emit an expression in a context which ignores the result. void EmitIgnoredExpr(const Expr *E); diff --git a/clang/test/CodeGen/ubsan-bitfield-conversion.c b/clang/test/CodeGen/ubsan-bitfield-conversion.c deleted file mode 100644 index ea9bdd7..0000000 --- a/clang/test/CodeGen/ubsan-bitfield-conversion.c +++ /dev/null @@ -1,61 +0,0 @@ -// RUN: %clang -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION -// RUN: %clang -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// RUN: %clang -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION - -typedef struct _xx { - int x1:3; - char x2:2; -} xx, *pxx; - -xx vxx; - -// CHECK-LABEL: define{{.*}} void @foo1 -void foo1(int x) { - vxx.x1 = x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo2 -void foo2(int x) { - vxx.x2 = x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo3 -void foo3() { - vxx.x1++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @foo4 -void foo4(int x) { - vxx.x1 += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} \ No newline at end of file diff --git a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp b/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp deleted file mode 100644 index 92f6e24..0000000 --- a/clang/test/CodeGenCXX/ubsan-bitfield-conversion.cpp +++ /dev/null @@ -1,94 +0,0 @@ -// RUN: %clang -x c++ -fsanitize=implicit-bitfield-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION -// RUN: %clang -x c++ -fsanitize=implicit-integer-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK -// RUN: %clang -x c++ -fsanitize=implicit-conversion -target x86_64-linux -S -emit-llvm -o - %s | FileCheck %s --check-prefixes=CHECK,CHECK-BITFIELD-CONVERSION - -struct S { - int a:3; - char b:2; -}; - -class C : public S { - public: - short c:3; -}; - -S s; -C c; - -// CHECK-LABEL: define{{.*}} void @{{.*foo1.*}} -void foo1(int x) { - s.a = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo2.*}} -void foo2(int x) { - s.b = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.b = x; - // CHECK: store i8 %{{.*}} - // CHECK-BITFIELD-CONVERSION: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 6 - // CHECK-BITFIELD-CONVERSION-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 6 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo3.*}} -void foo3() { - s.a++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a++; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} - -// CHECK-LABEL: define{{.*}} void @{{.*foo4.*}} -void foo4(int x) { - s.a += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - c.a += x; - // CHECK: store i8 %{{.*}} - // CHECK-NEXT: [[BFRESULTSHL:%.*]] = shl i8 {{.*}}, 5 - // CHECK-NEXT: [[BFRESULTASHR:%.*]] = ashr i8 [[BFRESULTSHL]], 5 - // CHECK-NEXT: [[BFRESULTCAST:%.*]] = sext i8 [[BFRESULTASHR]] to i32 - // CHECK-BITFIELD-CONVERSION: call void @__ubsan_handle_implicit_conversion - // CHECK-BITFIELD-CONVERSION-NEXT: br label %[[CONT:.*]], !nosanitize !6 - // CHECK-BITFIELD-CONVERSION: [[CONT]]: - // CHECK-NEXT: ret void -} \ No newline at end of file diff --git a/clang/test/Driver/fsanitize.c b/clang/test/Driver/fsanitize.c index 571f79a..1671825 100644 --- a/clang/test/Driver/fsanitize.c +++ b/clang/test/Driver/fsanitize.c @@ -35,20 +35,20 @@ // RUN: %clang --target=%itanium_abi_triple -fsanitize=integer %s -### 2>&1 | FileCheck %s --check-prefix=CHECK-INTEGER -implicit-check-not="-fsanitize-address-use-after-scope" // CHECK-INTEGER: "-fsanitize={{((signed-integer-overflow|unsigned-integer-overflow|integer-divide-by-zero|shift-base|shift-exponent|implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change|unsigned-shift-base),?){9}"}} -// RUN: %clang -fsanitize=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-RECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fno-sanitize-recover=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-NORECOVER -// RUN: %clang -fsanitize=implicit-integer-conversion -fsanitize-trap=implicit-integer-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-conversion,CHECK-implicit-integer-conversion-TRAP -// CHECK-implicit-integer-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} -// CHECK-implicit-integer-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// RUN: %clang -fsanitize=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-conversion -fsanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-RECOVER +// RUN: %clang -fsanitize=implicit-conversion -fno-sanitize-recover=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-NORECOVER +// RUN: %clang -fsanitize=implicit-conversion -fsanitize-trap=implicit-conversion %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-conversion,CHECK-implicit-conversion-TRAP +// CHECK-implicit-conversion: "-fsanitize={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-RECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-NORECOVER-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // ??? +// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-NORECOVER-NOT: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP: "-fsanitize-trap={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP-NOT: "-fsanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} +// CHECK-implicit-conversion-TRAP-NOT: "-fno-sanitize-recover={{((implicit-unsigned-integer-truncation|implicit-signed-integer-truncation|implicit-integer-sign-change),?){3}"}} // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER // RUN: %clang -fsanitize=implicit-integer-arithmetic-value-change -fsanitize-recover=implicit-integer-arithmetic-value-change %s -### 2>&1 | FileCheck %s --check-prefixes=CHECK-implicit-integer-arithmetic-value-change,CHECK-implicit-integer-arithmetic-value-change-RECOVER diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.cpp b/compiler-rt/lib/ubsan/ubsan_handlers.cpp index 27d0165..0f16507 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.cpp +++ b/compiler-rt/lib/ubsan/ubsan_handlers.cpp @@ -555,11 +555,13 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ReportOptions Opts, ValueHandle Src, ValueHandle Dst) { SourceLocation Loc = Data->Loc.acquire(); + ErrorType ET = ErrorType::GenericUB; + const TypeDescriptor &SrcTy = Data->FromType; const TypeDescriptor &DstTy = Data->ToType; + bool SrcSigned = SrcTy.isSignedIntegerTy(); bool DstSigned = DstTy.isSignedIntegerTy(); - ErrorType ET = ErrorType::GenericUB; switch (Data->Kind) { case ICCK_IntegerTruncation: { // Legacy, no longer used. @@ -592,23 +594,14 @@ static void handleImplicitConversion(ImplicitConversionData *Data, ScopedReport R(Opts, Loc, ET); - // In the case we have a bitfield, we want to explicitly say so in the - // error message. // FIXME: is it possible to dump the values as hex with fixed width? - if (Data->BitfieldBits) - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit bitfield, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << Data->BitfieldBits << (DstSigned ? "" : "un"); - else - Diag(Loc, DL_Error, ET, - "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " - "type %4 changed the value to %5 (%6-bit, %7signed)") - << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() - << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) - << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); + + Diag(Loc, DL_Error, ET, + "implicit conversion from type %0 of value %1 (%2-bit, %3signed) to " + "type %4 changed the value to %5 (%6-bit, %7signed)") + << SrcTy << Value(SrcTy, Src) << SrcTy.getIntegerBitWidth() + << (SrcSigned ? "" : "un") << DstTy << Value(DstTy, Dst) + << DstTy.getIntegerBitWidth() << (DstSigned ? "" : "un"); } void __ubsan::__ubsan_handle_implicit_conversion(ImplicitConversionData *Data, diff --git a/compiler-rt/lib/ubsan/ubsan_handlers.h b/compiler-rt/lib/ubsan/ubsan_handlers.h index bae661a..3bd5046 100644 --- a/compiler-rt/lib/ubsan/ubsan_handlers.h +++ b/compiler-rt/lib/ubsan/ubsan_handlers.h @@ -147,7 +147,6 @@ struct ImplicitConversionData { const TypeDescriptor &FromType; const TypeDescriptor &ToType; /* ImplicitConversionCheckKind */ unsigned char Kind; - unsigned int BitfieldBits; }; /// \brief Implict conversion that changed the value. -- cgit v1.1 From 8aa3a77eaf198afb7e01453e6daf6566b687945d Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 7 Mar 2024 13:40:30 -0800 Subject: [RISCV][GISEL] Legalize G_ZEXT, G_SEXT, and G_ANYEXT, G_SPLAT_VECTOR, and G_ICMP for scalable vector types This patch legalizes G_ZEXT, G_SEXT, and G_ANYEXT. If the type is a legal mask type, then the instruction is legalized as the element-wise select, where the condition on the select is the mask typed source operand, and the true and false values are 1 or -1 (for zero/any-extension and sign extension) and zero. If the type is a legal integer or vector integer type, then the instruction is marked as legal. The legalization of the extends may introduce a G_SPLAT_VECTOR, which needs to be legalized in this patch for the extend test cases to pass. A G_SPLAT_VECTOR is legal if the vector type is a legal integer or floating point vector type and the source operand is sXLen type. This is because the SelectionDAG patterns only support sXLen typed ISD::SPLAT_VECTORS, and we'd like to reuse those patterns. A G_SPLAT_VECTOR is cutom legalized if it has a legal s1 element vector type and s1 scalar operand. It is legalized to G_VMSET_VL or G_VMCLR_VL if the splat is all ones or all zeros respectivley. In the case of a non-constant mask splat, we legalize by promoting the scalar value to s8. In order to get the s8 element vector back into s1 vector, we use a G_ICMP. In order for the splat vector and extend tests to pass, we also need to legalize G_ICMP in this patch. A G_ICMP is legal if the destination type is a legal bool vector and the LHS and RHS are legal integer vector types. --- llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp | 9 + llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp | 2 +- llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp | 184 ++- llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h | 2 + llvm/lib/Target/RISCV/RISCVInstrGISel.td | 25 + .../GlobalISel/legalizer/rvv/legalize-anyext.mir | 1589 ++++++++++++++++++++ .../GlobalISel/legalizer/rvv/legalize-icmp.mir | 810 ++++++++++ .../GlobalISel/legalizer/rvv/legalize-sext.mir | 1589 ++++++++++++++++++++ .../legalizer/rvv/legalize-splatvector-rv32.mir | 694 +++++++++ .../legalizer/rvv/legalize-splatvector-rv64.mir | 817 ++++++++++ .../rvv/legalize-splatvector-s64-rv32.mir | 116 ++ .../GlobalISel/legalizer/rvv/legalize-zext.mir | 1589 ++++++++++++++++++++ llvm/test/MachineVerifier/test_g_fcmp.mir | 13 +- llvm/test/MachineVerifier/test_g_icmp.mir | 13 +- 14 files changed, 7436 insertions(+), 16 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir diff --git a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp index 797bbf7..95c6a35 100644 --- a/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp @@ -3006,6 +3006,15 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) { Observer.changedInstr(MI); return Legalized; } + case TargetOpcode::G_SPLAT_VECTOR: { + if (TypeIdx != 1) + return UnableToLegalize; + + Observer.changingInstr(MI); + widenScalarSrc(MI, WideTy, 1, TargetOpcode::G_ANYEXT); + Observer.changedInstr(MI); + return Legalized; + } } } diff --git a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp index b8ba782..6b35caf 100644 --- a/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp +++ b/llvm/lib/CodeGen/GlobalISel/MachineIRBuilder.cpp @@ -1278,7 +1278,7 @@ MachineIRBuilder::buildInstr(unsigned Opc, ArrayRef DstOps, return DstTy.isScalar(); else return DstTy.isVector() && - DstTy.getNumElements() == Op0Ty.getNumElements(); + DstTy.getElementCount() == Op0Ty.getElementCount(); }() && "Type Mismatch"); break; } diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp index 7f35107..38c1f9868 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.cpp @@ -139,20 +139,21 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, s32, sXLen) .minScalarSameAs(1, 0); + auto &ExtActions = + getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) + .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))); if (ST.is64Bit()) { - getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}) - .legalFor({{sXLen, s32}}) - .maxScalar(0, sXLen); - + ExtActions.legalFor({{sXLen, s32}}); getActionDefinitionsBuilder(G_SEXT_INREG) .customFor({sXLen}) .maxScalar(0, sXLen) .lower(); } else { - getActionDefinitionsBuilder({G_ZEXT, G_SEXT, G_ANYEXT}).maxScalar(0, sXLen); - getActionDefinitionsBuilder(G_SEXT_INREG).maxScalar(0, sXLen).lower(); } + ExtActions.customIf(typeIsLegalBoolVec(1, BoolVecTys, ST)) + .maxScalar(0, sXLen); // Merge/Unmerge for (unsigned Op : {G_MERGE_VALUES, G_UNMERGE_VALUES}) { @@ -235,7 +236,9 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) getActionDefinitionsBuilder(G_ICMP) .legalFor({{sXLen, sXLen}, {sXLen, p0}}) - .widenScalarToNextPow2(1) + .legalIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), + typeIsLegalIntOrFPVec(1, IntOrFPVecTys, ST))) + .widenScalarOrEltToNextPow2OrMinSize(1, 8) .clampScalar(1, sXLen, sXLen) .clampScalar(0, sXLen, sXLen); @@ -418,6 +421,29 @@ RISCVLegalizerInfo::RISCVLegalizerInfo(const RISCVSubtarget &ST) .clampScalar(0, sXLen, sXLen) .customFor({sXLen}); + auto &SplatActions = + getActionDefinitionsBuilder(G_SPLAT_VECTOR) + .legalIf(all(typeIsLegalIntOrFPVec(0, IntOrFPVecTys, ST), + typeIs(1, sXLen))) + .customIf(all(typeIsLegalBoolVec(0, BoolVecTys, ST), typeIs(1, s1))); + // Handle case of s64 element vectors on RV32. If the subtarget does not have + // f64, then try to lower it to G_SPLAT_VECTOR_SPLIT_64_VL. If the subtarget + // does have f64, then we don't know whether the type is an f64 or an i64, + // so mark the G_SPLAT_VECTOR as legal and decide later what to do with it, + // depending on how the instructions it consumes are legalized. They are not + // legalized yet since legalization is in reverse postorder, so we cannot + // make the decision at this moment. + if (XLen == 32) { + if (ST.hasVInstructionsF64() && ST.hasStdExtD()) + SplatActions.legalIf(all( + typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); + else if (ST.hasVInstructionsI64()) + SplatActions.customIf(all( + typeInSet(0, {nxv1s64, nxv2s64, nxv4s64, nxv8s64}), typeIs(1, s64))); + } + + SplatActions.clampScalar(1, sXLen, sXLen); + getLegacyLegalizerInfo().computeTables(); } @@ -576,7 +602,145 @@ bool RISCVLegalizerInfo::legalizeVScale(MachineInstr &MI, auto VScale = MIB.buildLShr(XLenTy, VLENB, MIB.buildConstant(XLenTy, 3)); MIB.buildMul(Dst, VScale, MIB.buildConstant(XLenTy, Val)); } + MI.eraseFromParent(); + return true; +} + +// Custom-lower extensions from mask vectors by using a vselect either with 1 +// for zero/any-extension or -1 for sign-extension: +// (vXiN = (s|z)ext vXi1:vmask) -> (vXiN = vselect vmask, (-1 or 1), 0) +// Note that any-extension is lowered identically to zero-extension. +bool RISCVLegalizerInfo::legalizeExt(MachineInstr &MI, + MachineIRBuilder &MIB) const { + + unsigned Opc = MI.getOpcode(); + assert(Opc == TargetOpcode::G_ZEXT || Opc == TargetOpcode::G_SEXT || + Opc == TargetOpcode::G_ANYEXT); + + MachineRegisterInfo &MRI = *MIB.getMRI(); + Register Dst = MI.getOperand(0).getReg(); + Register Src = MI.getOperand(1).getReg(); + + LLT DstTy = MRI.getType(Dst); + int64_t ExtTrueVal = Opc == TargetOpcode::G_SEXT ? -1 : 1; + LLT DstEltTy = DstTy.getElementType(); + auto SplatZero = MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, 0)); + auto SplatTrue = + MIB.buildSplatVector(DstTy, MIB.buildConstant(DstEltTy, ExtTrueVal)); + MIB.buildSelect(Dst, Src, SplatTrue, SplatZero); + + MI.eraseFromParent(); + return true; +} + +/// Return the type of the mask type suitable for masking the provided +/// vector type. This is simply an i1 element type vector of the same +/// (possibly scalable) length. +static LLT getMaskTypeFor(LLT VecTy) { + assert(VecTy.isVector()); + ElementCount EC = VecTy.getElementCount(); + return LLT::vector(EC, LLT::scalar(1)); +} + +/// Creates an all ones mask suitable for masking a vector of type VecTy with +/// vector length VL. +static MachineInstrBuilder buildAllOnesMask(LLT VecTy, const SrcOp &VL, + MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + LLT MaskTy = getMaskTypeFor(VecTy); + return MIB.buildInstr(RISCV::G_VMSET_VL, {MaskTy}, {VL}); +} + +/// Gets the two common "VL" operands: an all-ones mask and the vector length. +/// VecTy is a scalable vector type. +static std::pair +buildDefaultVLOps(const DstOp &Dst, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + LLT VecTy = Dst.getLLTTy(MRI); + assert(VecTy.isScalableVector() && "Expecting scalable container type"); + Register VL(RISCV::X0); + MachineInstrBuilder Mask = buildAllOnesMask(VecTy, VL, MIB, MRI); + return {Mask, VL}; +} + +static MachineInstrBuilder +buildSplatPartsS64WithVL(const DstOp &Dst, const SrcOp &Passthru, Register Lo, + Register Hi, Register VL, MachineIRBuilder &MIB, + MachineRegisterInfo &MRI) { + // TODO: If the Hi bits of the splat are undefined, then it's fine to just + // splat Lo even if it might be sign extended. I don't think we have + // introduced a case where we're build a s64 where the upper bits are undef + // yet. + + // Fall back to a stack store and stride x0 vector load. + // TODO: need to lower G_SPLAT_VECTOR_SPLIT_I64. This is done in + // preprocessDAG in SDAG. + return MIB.buildInstr(RISCV::G_SPLAT_VECTOR_SPLIT_I64_VL, {Dst}, + {Passthru, Lo, Hi, VL}); +} + +static MachineInstrBuilder +buildSplatSplitS64WithVL(const DstOp &Dst, const SrcOp &Passthru, + const SrcOp &Scalar, Register VL, + MachineIRBuilder &MIB, MachineRegisterInfo &MRI) { + assert(Scalar.getLLTTy(MRI) == LLT::scalar(64) && "Unexpected VecTy!"); + auto Unmerge = MIB.buildUnmerge(LLT::scalar(32), Scalar); + return buildSplatPartsS64WithVL(Dst, Passthru, Unmerge.getReg(0), + Unmerge.getReg(1), VL, MIB, MRI); +} + +// Lower splats of s1 types to G_ICMP. For each mask vector type, we have a +// legal equivalently-sized i8 type, so we can use that as a go-between. +// Splats of s1 types that have constant value can be legalized as VMSET_VL or +// VMCLR_VL. +bool RISCVLegalizerInfo::legalizeSplatVector(MachineInstr &MI, + MachineIRBuilder &MIB) const { + assert(MI.getOpcode() == TargetOpcode::G_SPLAT_VECTOR); + + MachineRegisterInfo &MRI = *MIB.getMRI(); + + Register Dst = MI.getOperand(0).getReg(); + Register SplatVal = MI.getOperand(1).getReg(); + + LLT VecTy = MRI.getType(Dst); + LLT XLenTy(STI.getXLenVT()); + + // Handle case of s64 element vectors on rv32 + if (XLenTy.getSizeInBits() == 32 && + VecTy.getElementType().getSizeInBits() == 64) { + auto [_, VL] = buildDefaultVLOps(Dst, MIB, MRI); + buildSplatSplitS64WithVL(Dst, MIB.buildUndef(VecTy), SplatVal, VL, MIB, + MRI); + MI.eraseFromParent(); + return true; + } + + // All-zeros or all-ones splats are handled specially. + MachineInstr &SplatValMI = *MRI.getVRegDef(SplatVal); + if (isAllOnesOrAllOnesSplat(SplatValMI, MRI)) { + auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; + MIB.buildInstr(RISCV::G_VMSET_VL, {Dst}, {VL}); + MI.eraseFromParent(); + return true; + } + if (isNullOrNullSplat(SplatValMI, MRI)) { + auto VL = buildDefaultVLOps(VecTy, MIB, MRI).second; + MIB.buildInstr(RISCV::G_VMCLR_VL, {Dst}, {VL}); + MI.eraseFromParent(); + return true; + } + // Handle non-constant mask splat (i.e. not sure if it's all zeros or all + // ones) by promoting it to an s8 splat. + LLT InterEltTy = LLT::scalar(8); + LLT InterTy = VecTy.changeElementType(InterEltTy); + auto ZExtSplatVal = MIB.buildZExt(InterEltTy, SplatVal); + auto And = + MIB.buildAnd(InterEltTy, ZExtSplatVal, MIB.buildConstant(InterEltTy, 1)); + auto LHS = MIB.buildSplatVector(InterTy, And); + auto ZeroSplat = + MIB.buildSplatVector(InterTy, MIB.buildConstant(InterEltTy, 0)); + MIB.buildICmp(CmpInst::Predicate::ICMP_NE, Dst, LHS, ZeroSplat); MI.eraseFromParent(); return true; } @@ -640,6 +804,12 @@ bool RISCVLegalizerInfo::legalizeCustom( return legalizeVAStart(MI, MIRBuilder); case TargetOpcode::G_VSCALE: return legalizeVScale(MI, MIRBuilder); + case TargetOpcode::G_ZEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ANYEXT: + return legalizeExt(MI, MIRBuilder); + case TargetOpcode::G_SPLAT_VECTOR: + return legalizeSplatVector(MI, MIRBuilder); } llvm_unreachable("expected switch to return"); diff --git a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h index e2a98c8..5bb1e7a 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h +++ b/llvm/lib/Target/RISCV/GISel/RISCVLegalizerInfo.h @@ -43,6 +43,8 @@ private: bool legalizeVAStart(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const; bool legalizeVScale(MachineInstr &MI, MachineIRBuilder &MIB) const; + bool legalizeExt(MachineInstr &MI, MachineIRBuilder &MIRBuilder) const; + bool legalizeSplatVector(MachineInstr &MI, MachineIRBuilder &MIB) const; }; } // end namespace llvm #endif diff --git a/llvm/lib/Target/RISCV/RISCVInstrGISel.td b/llvm/lib/Target/RISCV/RISCVInstrGISel.td index 54e22d6..ba40662 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrGISel.td +++ b/llvm/lib/Target/RISCV/RISCVInstrGISel.td @@ -32,3 +32,28 @@ def G_READ_VLENB : RISCVGenericInstruction { let hasSideEffects = false; } def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::VMCLR_VL +def G_VMCLR_VL : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$vl); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::VMSET_VL +def G_VMSET_VL : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type1:$vl); + let hasSideEffects = false; +} +def : GINodeEquiv; + +// Pseudo equivalent to a RISCVISD::SPLAT_VECTOR_SPLIT_I64_VL. There is no +// record to mark as equivalent to using GINodeEquiv because it gets lowered +// before instruction selection. +def G_SPLAT_VECTOR_SPLIT_I64_VL : RISCVGenericInstruction { + let OutOperandList = (outs type0:$dst); + let InOperandList = (ins type0:$passthru, type1:$hi, type1:$lo, type2:$vl); + let hasSideEffects = false; +} diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir new file mode 100644 index 0000000..3a2d40f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-anyext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: anyext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i8_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i8_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i16_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i16_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i8_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i8_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i16_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i16_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i8_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i8_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i16_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i16_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i8_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv8i8_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i16_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv8i16_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i8_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv16i8_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i16_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv16i16_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv32i8_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv32i8_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv32i16_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv32i16_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: anyext_nxv64i8_nxv64i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv64i8_nxv64i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: anyext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: anyext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m2 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: anyext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ANYEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ANYEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_() = G_ANYEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ANYEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir new file mode 100644 index 0000000..d1df954 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-icmp.mir @@ -0,0 +1,810 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +--- +name: icmp_nxv1i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv64i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv64i1 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C3]](s32) + ; RV32-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv64i1 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT2:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR2:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT2]](s64) + ; RV64-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT3:%[0-9]+]]:_(s64) = G_ANYEXT [[C3]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR3:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT3]](s64) + ; RV64-NEXT: [[SELECT1:%[0-9]+]]:_() = G_SELECT [[DEF]](), [[SPLAT_VECTOR3]], [[SPLAT_VECTOR2]] + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[SELECT]](), [[SELECT1]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv64i8 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv64i8 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv32i16 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv32i16 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i32 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i32 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i32 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i32 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i32 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i32 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i32 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i32 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv16i32 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv16i32 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv1i64 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv1i64 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv2i64 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv2i64 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv4i64 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv4i64 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... +--- +name: icmp_nxv8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32-LABEL: name: icmp_nxv8i64 + ; RV32: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV32-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32-NEXT: $v8 = COPY [[ICMP]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: icmp_nxv8i64 + ; RV64: [[DEF:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; RV64-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64-NEXT: $v8 = COPY [[ICMP]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0, %0 + $v8 = COPY %1() + PseudoRET implicit $v8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir new file mode 100644 index 0000000..1571daf --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-sext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: sext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i8_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i8_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i16_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i16_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i8_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i8_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i16_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i16_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i8_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i8_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i16_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i16_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i8_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv8i8_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i16_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv8i16_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 -1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i8_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv16i8_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i16_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv16i16_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv32i8_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv32i8_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv32i16_nxv32i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv32i16_nxv32i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v0 + ; RV32-LABEL: name: sext_nxv64i8_nxv64i1 + ; RV32: liveins: $v0 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv64i8_nxv64i1 + ; RV64: liveins: $v0 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v0 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 -1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v0 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: sext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m2 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: sext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m2 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: sext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_SEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_SEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[SEXT:%[0-9]+]]:_() = G_SEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_SEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir new file mode 100644 index 0000000..109536a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv32.mir @@ -0,0 +1,694 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: splatvector_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv1i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv2i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv8i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv16i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv32i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv32i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv64i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv64i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[AND1]](s32) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C2]](s32) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s32) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s32) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... + +--- +name: splatvector_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... + +--- +name: splatvector_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv16i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m8 = COPY %2() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir new file mode 100644 index 0000000..7bf5f83 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-rv64.mir @@ -0,0 +1,817 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck %s + +--- +name: splatvector_nxv1i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv1i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv1i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv2i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv2i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv2i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv4i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv4i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv4i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv8i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv8i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv8i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv16i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv16i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv16i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv32i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv32i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv32i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv32i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... +--- +name: splatvector_nxv64i1_0 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_0 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMCLR_VL:%[0-9]+]]:_() = G_VMCLR_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMCLR_VL]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 0 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_1 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv64i1_1 + ; CHECK: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: [[VMSET_VL1:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; CHECK-NEXT: $v0 = COPY [[VMSET_VL1]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s1) = G_CONSTANT i1 1 + %1:_() = G_SPLAT_VECTOR %0(s1) + $v0 = COPY %1() + PseudoRET implicit $v0 + +... +--- +name: splatvector_nxv64i1_2 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + liveins: $x10 + + ; CHECK-LABEL: name: splatvector_nxv64i1_2 + ; CHECK: liveins: $x10 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $x10 + ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[COPY]](s64) + ; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[TRUNC]], [[C1]] + ; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[AND]], [[C]] + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[AND1]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C2]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; CHECK-NEXT: [[ICMP:%[0-9]+]]:_() = G_ICMP intpred(ne), [[SPLAT_VECTOR]](), [[SPLAT_VECTOR1]] + ; CHECK-NEXT: $v0 = COPY [[ICMP]]() + ; CHECK-NEXT: PseudoRET implicit $v0 + %0:_(s64) = COPY $x10 + %1:_(s1) = G_TRUNC %0(s64) + %2:_() = G_SPLAT_VECTOR %1(s1) + $v0 = COPY %2() + PseudoRET implicit $v0 +... + +--- +name: splatvector_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... + +--- +name: splatvector_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i8 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_() = G_IMPLICIT_DEF + %1:_(s8) = G_CONSTANT i8 0 + %2:_() = G_SPLAT_VECTOR %1(s8) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_() = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i16 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_() = G_IMPLICIT_DEF + %1:_(s16) = G_CONSTANT i16 0 + %2:_() = G_SPLAT_VECTOR %1(s16) + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_() = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_() = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv16i32 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv16i32 + ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; CHECK-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_() = G_IMPLICIT_DEF + %1:_(s32) = G_CONSTANT i32 0 + %2:_() = G_SPLAT_VECTOR %1(s32) + $v8m8 = COPY %2() + PseudoRET implicit $v8m8 + +... +--- +name: splatvector_nxv1i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv1i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_() = G_SPLAT_VECTOR %1(s64) + $v8 = COPY %2() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv2i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m2 + %0:_() = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_() = G_SPLAT_VECTOR %1(s64) + $v8m2 = COPY %2() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv4i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv4i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m4 + %0:_() = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_() = G_SPLAT_VECTOR %1(s64) + $v8m4 = COPY %2() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv8i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; CHECK-LABEL: name: splatvector_nxv8i64 + ; CHECK: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; CHECK-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; CHECK-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; CHECK-NEXT: PseudoRET implicit $v8m8 + %0:_() = G_IMPLICIT_DEF + %1:_(s64) = G_CONSTANT i64 0 + %2:_() = G_SPLAT_VECTOR %1(s64) + $v8m8 = COPY %2() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir new file mode 100644 index 0000000..806c9b9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-splatvector-s64-rv32.mir @@ -0,0 +1,116 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=HasF64 %s +# RUN: llc -mtriple=riscv32 -mattr=+Zve64x -run-pass=legalizer %s -o - | FileCheck --check-prefix=NoF64 %s + +--- +name: splatvector_nxv1i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv1i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8 = COPY [[SPLAT_VECTOR]]() + ; HasF64-NEXT: PseudoRET implicit $v8 + ; + ; NoF64-LABEL: name: splatvector_nxv1i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_() = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]]() + ; NoF64-NEXT: PseudoRET implicit $v8 + %0:_(s64) = G_IMPLICIT_DEF + %1:_() = G_SPLAT_VECTOR %0(s64) + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: splatvector_nxv2i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv2i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR]]() + ; HasF64-NEXT: PseudoRET implicit $v8m2 + ; + ; NoF64-LABEL: name: splatvector_nxv2i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_() = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m2 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]]() + ; NoF64-NEXT: PseudoRET implicit $v8m2 + %0:_(s64) = G_IMPLICIT_DEF + %1:_() = G_SPLAT_VECTOR %0(s64) + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: splatvector_nxv4i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv4i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR]]() + ; HasF64-NEXT: PseudoRET implicit $v8m4 + ; + ; NoF64-LABEL: name: splatvector_nxv4i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_() = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m4 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]]() + ; NoF64-NEXT: PseudoRET implicit $v8m4 + %0:_(s64) = G_IMPLICIT_DEF + %1:_() = G_SPLAT_VECTOR %0(s64) + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: splatvector_nxv8i64 +legalized: false +tracksRegLiveness: true +body: | + bb.1: + ; HasF64-LABEL: name: splatvector_nxv8i64 + ; HasF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; HasF64-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[DEF]](s32), [[DEF1]](s32) + ; HasF64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; HasF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR]]() + ; HasF64-NEXT: PseudoRET implicit $v8m8 + ; + ; NoF64-LABEL: name: splatvector_nxv8i64 + ; NoF64: [[DEF:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[DEF1:%[0-9]+]]:_(s32) = G_IMPLICIT_DEF + ; NoF64-NEXT: [[VMSET_VL:%[0-9]+]]:_() = G_VMSET_VL $x0 + ; NoF64-NEXT: [[DEF2:%[0-9]+]]:_() = G_IMPLICIT_DEF + ; NoF64-NEXT: [[SPLAT_VECTOR_SPLIT_I64_VL:%[0-9]+]]:_() = G_SPLAT_VECTOR_SPLIT_I64_VL [[DEF2]], [[DEF]](s32), [[DEF1]], $x0 + ; NoF64-NEXT: $v8m8 = COPY [[SPLAT_VECTOR_SPLIT_I64_VL]]() + ; NoF64-NEXT: PseudoRET implicit $v8m8 + %0:_(s64) = G_IMPLICIT_DEF + %1:_() = G_SPLAT_VECTOR %0(s64) + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir new file mode 100644 index 0000000..fe4ddfa --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/legalizer/rvv/legalize-zext.mir @@ -0,0 +1,1589 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV32 %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=legalizer %s -o - | FileCheck --check-prefix=RV64 %s + +# Extend from s1 element vectors +--- +name: zext_nxv1i8_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i8_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i8_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i16_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i16_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i16_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i32_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i8_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i8_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i8_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i16_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i16_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i16_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i8_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i8_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i8_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i16_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i16_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i16_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i32_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i8_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i8_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv8i8_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv8i16_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i16_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv8i16_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv8i32_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C]](s32), [[C1]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV]](s64) + ; RV32-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[C2]](s32), [[C3]](s32) + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[MV1]](s64) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 0 + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i8_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i8_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv16i8_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m2 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv16i16_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i16_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv16i16_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv16i32_nxv16i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv32i8_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i8_nxv32i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv32i8_nxv32i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m4 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv32i16_nxv32i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i16_nxv32i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv32i16_nxv32i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv64i8_nxv64i1 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv64i8_nxv64i1 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV32-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C]](s32) + ; RV32-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV32-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[C1]](s32) + ; RV32-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV32-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv64i8_nxv64i1 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 + ; RV64-NEXT: [[ANYEXT:%[0-9]+]]:_(s64) = G_ANYEXT [[C]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT]](s64) + ; RV64-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 + ; RV64-NEXT: [[ANYEXT1:%[0-9]+]]:_(s64) = G_ANYEXT [[C1]](s32) + ; RV64-NEXT: [[SPLAT_VECTOR1:%[0-9]+]]:_() = G_SPLAT_VECTOR [[ANYEXT1]](s64) + ; RV64-NEXT: [[SELECT:%[0-9]+]]:_() = G_SELECT [[COPY]](), [[SPLAT_VECTOR1]], [[SPLAT_VECTOR]] + ; RV64-NEXT: $v8m8 = COPY [[SELECT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s8 element vectors +--- +name: zext_nxv1i16_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m2 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s16 element vectors +--- +name: zext_nxv1i32_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... + +# Extend from s32 element vectors +--- +name: zext_nxv1i64_nxv1i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8 + ; + ; RV64-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8 = COPY %0() + PseudoRET implicit $v8 +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m2 + %1:_() = COPY $v8 + %0:_() = G_ZEXT %1() + $v8m2 = COPY %0() + PseudoRET implicit $v8m2 +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m2 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m4 + %1:_() = COPY $v8m2 + %0:_() = G_ZEXT %1() + $v8m4 = COPY %0() + PseudoRET implicit $v8m4 +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: false +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + ; RV32-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32: liveins: $v8 + ; RV32-NEXT: {{ $}} + ; RV32-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV32-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV32-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64: liveins: $v8 + ; RV64-NEXT: {{ $}} + ; RV64-NEXT: [[COPY:%[0-9]+]]:_() = COPY $v8m4 + ; RV64-NEXT: [[ZEXT:%[0-9]+]]:_() = G_ZEXT [[COPY]]() + ; RV64-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64-NEXT: PseudoRET implicit $v8m8 + %1:_() = COPY $v8m4 + %0:_() = G_ZEXT %1() + $v8m8 = COPY %0() + PseudoRET implicit $v8m8 +... diff --git a/llvm/test/MachineVerifier/test_g_fcmp.mir b/llvm/test/MachineVerifier/test_g_fcmp.mir index 9a73569..17be746 100644 --- a/llvm/test/MachineVerifier/test_g_fcmp.mir +++ b/llvm/test/MachineVerifier/test_g_fcmp.mir @@ -24,17 +24,22 @@ body: | %4:_(<2 x s32>) = G_IMPLICIT_DEF %5:_(s1) = G_FCMP floatpred(oeq), %3, %4 - ; mismatched element count + ; mismatched fixed element count ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of %6:_(<2 x s32>) = G_IMPLICIT_DEF %7:_(<2 x s32>) = G_IMPLICIT_DEF %8:_(<4 x s1>) = G_FCMP floatpred(oeq), %6, %7 + ; mismatched scalable element count + ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of + %9:_() = G_IMPLICIT_DEF + %10:_() = G_IMPLICIT_DEF + %11:_() = G_FCMP floatpred(oeq), %9, %10 ; mismatched scalar element type ; CHECK: *** Bad machine code: Type mismatch in generic instruction *** - %9:_(s32) = G_FCONSTANT float 0.0 - %10:_(s64) = G_FCONSTANT float 1.0 - %11:_(s1) = G_FCMP floatpred(oeq), %9, %10 + %12:_(s32) = G_FCONSTANT float 0.0 + %13:_(s64) = G_FCONSTANT float 1.0 + %14:_(s1) = G_FCMP floatpred(oeq), %12, %13 ... diff --git a/llvm/test/MachineVerifier/test_g_icmp.mir b/llvm/test/MachineVerifier/test_g_icmp.mir index 7c64e25..74e3d34 100644 --- a/llvm/test/MachineVerifier/test_g_icmp.mir +++ b/llvm/test/MachineVerifier/test_g_icmp.mir @@ -24,17 +24,22 @@ body: | %4:_(<2 x s32>) = G_IMPLICIT_DEF %5:_(s1) = G_ICMP intpred(eq), %3, %4 - ; mismatched element count + ; mismatched fixed element count ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of %6:_(<2 x s32>) = G_IMPLICIT_DEF %7:_(<2 x s32>) = G_IMPLICIT_DEF %8:_(<4 x s1>) = G_ICMP intpred(eq), %6, %7 + ; mismatched scalable element count + ; CHECK: Bad machine code: Generic vector icmp/fcmp must preserve number of + %9:_() = G_IMPLICIT_DEF + %10:_() = G_IMPLICIT_DEF + %11:_() = G_ICMP intpred(eq), %9, %10 ; mismatched scalar element type ; CHECK: *** Bad machine code: Type mismatch in generic instruction *** - %9:_(s32) = G_CONSTANT i32 0 - %10:_(s64) = G_CONSTANT i32 1 - %11:_(s1) = G_ICMP intpred(eq), %9, %10 + %12:_(s32) = G_CONSTANT i32 0 + %13:_(s64) = G_CONSTANT i32 1 + %14:_(s1) = G_ICMP intpred(eq), %12, %13 ... -- cgit v1.1 From 05f673bcefb0912a38a67b0026cad3768b2f85d2 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Thu, 14 Mar 2024 07:20:23 -0700 Subject: [RISCV][GISEL] Regbank select for scalable vector G_ICMP --- .../Target/RISCV/GISel/RISCVRegisterBankInfo.cpp | 5 +- .../RISCV/GlobalISel/regbankselect/rvv/icmp.mir | 675 +++++++++++++++++++++ 2 files changed, 679 insertions(+), 1 deletion(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 888bcc4..d0f9f43 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -529,7 +529,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { if (!Ty.isValid()) continue; - if (isPreISelGenericFloatingPointOpcode(Opc)) + if (Ty.isVector()) + OpdsMapping[Idx] = + getVRBValueMapping(Ty.getSizeInBits().getKnownMinValue()); + else if (isPreISelGenericFloatingPointOpcode(Opc)) OpdsMapping[Idx] = getFPValueMapping(Ty.getSizeInBits()); else OpdsMapping[Idx] = GPRValueMapping; diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir new file mode 100644 index 0000000..925d6ae --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/icmp.mir @@ -0,0 +1,675 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: icmp_nxv1i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i1 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i1 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i1 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i8 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i8 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i16 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i16 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i32 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i32 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i64 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i64 + ; RV32I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV32I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV32I-NEXT: $v8 = COPY [[ICMP]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i64 + ; RV64I: [[DEF:%[0-9]+]]:vrb() = G_IMPLICIT_DEF + ; RV64I-NEXT: [[ICMP:%[0-9]+]]:vrb() = G_ICMP intpred(sgt), [[DEF]](), [[DEF]] + ; RV64I-NEXT: $v8 = COPY [[ICMP]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = G_IMPLICIT_DEF + %1:_() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... -- cgit v1.1 From 35a9393a3f775d4e1506965b9cfeedd45599f1a7 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 15 Mar 2024 10:00:22 -0700 Subject: [RISCV][GISEL] Instruction selection for G_ICMP --- .../GlobalISel/instruction-select/rvv/icmp.mir | 534 +++++++++++++++++++++ 1 file changed, 534 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir new file mode 100644 index 0000000..df0d48a --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/icmp.mir @@ -0,0 +1,534 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +# Don't test i1 element types here since they have been widened to i8 in legalization + +--- +name: icmp_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLTU_VV_MF8_:%[0-9]+]]:vr = PseudoVMSLTU_VV_MF8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_MF8_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ult), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLT_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLT_VV_MF4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLT_VV_MF4_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(slt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLEU_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLEU_VV_MF2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLEU_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(uge), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i8 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i8 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_M1_:%[0-9]+]]:vr = PseudoVMSLE_VV_M1 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sge), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ugt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv64i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv64i8 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv64i8 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 3 /* e8 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ule), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF4_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF4_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sle), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSNE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSNE_VV_MF2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSNE_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ne), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i16 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i16 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(eq), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M2 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ult), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M4 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(slt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv32i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv32i16 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv32i16 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M8 [[DEF]], [[DEF]], -1, 4 /* e16 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(uge), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i32 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i32 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLE_VV_MF2_:%[0-9]+]]:vr = PseudoVMSLE_VV_MF2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLE_VV_MF2_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sge), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i32 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i32 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSLTU_VV_M1_:%[0-9]+]]:vr = PseudoVMSLTU_VV_M1 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSLTU_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ugt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLT_VV_M2 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sgt), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLEU_VV_M4 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ule), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv16i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv16i32 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv16i32 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLE_VV_M8 [[DEF]], [[DEF]], -1, 5 /* e32 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(sle), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv1i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv1i64 + ; RV32I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv1i64 + ; RV64I: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: [[PseudoVMSEQ_VV_M1_:%[0-9]+]]:vr = PseudoVMSEQ_VV_M1 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY [[PseudoVMSEQ_VV_M1_]] + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(eq), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv2i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv2i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv2i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSNE_VV_M2 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ne), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv4i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv4i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv4i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M4 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ult), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: icmp_nxv8i64 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + ; RV32I-LABEL: name: icmp_nxv8i64 + ; RV32I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: icmp_nxv8i64 + ; RV64I: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVMSLTU_VV_M8 [[DEF]], [[DEF]], -1, 6 /* e64 */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = G_IMPLICIT_DEF + %1:vrb() = G_ICMP intpred(ult), %0(), %0 + $v8 = COPY %1() + PseudoRET implicit $v8 + +... -- cgit v1.1 From 188ca374ee601a50b6f5f6c1cf7e7dc3998e3a62 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 15 Mar 2024 07:35:55 -0700 Subject: [RISCV][GISEL] Regbankselect for G_ZEXT, G_SEXT, and G_ANYEXT with scalable vector type --- .../Target/RISCV/GISel/RISCVRegisterBankInfo.cpp | 12 +- .../RISCV/GlobalISel/regbankselect/rvv/anyext.mir | 820 +++++++++++++++++++++ .../RISCV/GlobalISel/regbankselect/rvv/sext.mir | 820 +++++++++++++++++++++ .../RISCV/GlobalISel/regbankselect/rvv/zext.mir | 820 +++++++++++++++++++++ 4 files changed, 2469 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index d0f9f43..bab95c5 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -321,13 +321,19 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_INTTOPTR: case TargetOpcode::G_TRUNC: - case TargetOpcode::G_ANYEXT: - case TargetOpcode::G_SEXT: - case TargetOpcode::G_ZEXT: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, NumOperands); + case TargetOpcode::G_ANYEXT: + case TargetOpcode::G_SEXT: + case TargetOpcode::G_ZEXT: { + // Handle vector extends in the default case below. + if (MRI.getType(MI.getOperand(0).getReg()).isVector()) + break; + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, + NumOperands); + } case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir new file mode 100644 index 0000000..062179c --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/anyext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: anyext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m2 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ANYEXT:%[0-9]+]]:vrb() = G_ANYEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ANYEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir new file mode 100644 index 0000000..a754b8b --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/sext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: sext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m2 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m2 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[SEXT:%[0-9]+]]:vrb() = G_SEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[SEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir new file mode 100644 index 0000000..c3bc4a9 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/regbankselect/rvv/zext.mir @@ -0,0 +1,820 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+m,+v -run-pass=regbankselect \ +# RUN: -disable-gisel-legality-check -simplify-mir -verify-machineinstrs %s \ +# RUN: -o - | FileCheck -check-prefix=RV64I %s + +--- +name: zext_nxv1i16_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m2 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i32_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i64_nxv1i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m2 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:_() = COPY $v8 + %1:_() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m2 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m4 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:_() = COPY $v8m2 + %1:_() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV32I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV32I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrb() = COPY $v8m4 + ; RV64I-NEXT: [[ZEXT:%[0-9]+]]:vrb() = G_ZEXT [[COPY]]() + ; RV64I-NEXT: $v8m8 = COPY [[ZEXT]]() + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:_() = COPY $v8m4 + %1:_() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... -- cgit v1.1 From 63c925ca808f216f805b76873743450456e350f2 Mon Sep 17 00:00:00 2001 From: Michael Maitland Date: Fri, 15 Mar 2024 07:52:41 -0700 Subject: [RISCV][GISEL] Instruction selection for G_ZEXT, G_SEXT, and G_ANYEXT with scalable vector type --- .../GlobalISel/instruction-select/rvv/anyext.mir | 902 +++++++++++++++++++++ .../GlobalISel/instruction-select/rvv/sext.mir | 900 ++++++++++++++++++++ .../GlobalISel/instruction-select/rvv/zext.mir | 900 ++++++++++++++++++++ 3 files changed, 2702 insertions(+) create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir create mode 100644 llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir new file mode 100644 index 0000000..eda1180 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/anyext.mir @@ -0,0 +1,902 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir \ +# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir \ +# RUN: -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: anyext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: anyext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: anyext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: anyext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: anyext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ANYEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: anyext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: anyext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ANYEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: anyext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: anyext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ANYEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir new file mode 100644 index 0000000..382166f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/sext.mir @@ -0,0 +1,900 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: sext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: sext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: sext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVSEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: sext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: sext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVSEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_SEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: sext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: sext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVSEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_SEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: sext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: sext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVSEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_SEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir new file mode 100644 index 0000000..2fc9e05 --- /dev/null +++ b/llvm/test/CodeGen/RISCV/GlobalISel/instruction-select/rvv/zext.mir @@ -0,0 +1,900 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -mtriple=riscv32 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV32I %s +# RUN: llc -mtriple=riscv64 -mattr=+v -run-pass=instruction-select -simplify-mir -verify-machineinstrs %s -o - | FileCheck -check-prefix=RV64I %s + +--- +name: zext_nxv1i16_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i16_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i32_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF8_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i16_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i16_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF8_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i16_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv4i16_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv4i32_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF8_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i16_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv8i16_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv8i32_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF8_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i16_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv16i16_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv16i32_nxv16i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv32i16_nxv32i8 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv32i16_nxv32i8 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 4 /* e16 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i32_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i32_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_MF2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv1i64_nxv1i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF4_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i32_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv2i32_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF4_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i32_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv4i32_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF4_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i32_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv8i32_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF4_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv16i32_nxv16i16 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv16i32_nxv16i16 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 5 /* e32 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... +--- +name: zext_nxv1i64_nxv1i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8 + ; + ; RV64I-LABEL: name: zext_nxv1i64_nxv1i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vr = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vr = PseudoVZEXT_VF2_M1 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8 = COPY %1() + PseudoRET implicit $v8 + +... +--- +name: zext_nxv2i64_nxv2i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m2 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m2 + ; + ; RV64I-LABEL: name: zext_nxv2i64_nxv2i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vr = COPY $v8 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm2 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm2 = PseudoVZEXT_VF2_M2 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m2 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m2 + %0:vrb() = COPY $v8 + %1:vrb() = G_ZEXT %0() + $v8m2 = COPY %1() + PseudoRET implicit $v8m2 + +... +--- +name: zext_nxv4i64_nxv4i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m4 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m4 + ; + ; RV64I-LABEL: name: zext_nxv4i64_nxv4i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm2 = COPY $v8m2 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm4 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm4 = PseudoVZEXT_VF2_M4 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m4 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m4 + %0:vrb() = COPY $v8m2 + %1:vrb() = G_ZEXT %0() + $v8m4 = COPY %1() + PseudoRET implicit $v8m4 + +... +--- +name: zext_nxv8i64_nxv8i32 +legalized: true +regBankSelected: true +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $v8 + + ; RV32I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV32I: liveins: $v8 + ; RV32I-NEXT: {{ $}} + ; RV32I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV32I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV32I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV32I-NEXT: $v8m8 = COPY %1 + ; RV32I-NEXT: PseudoRET implicit $v8m8 + ; + ; RV64I-LABEL: name: zext_nxv8i64_nxv8i32 + ; RV64I: liveins: $v8 + ; RV64I-NEXT: {{ $}} + ; RV64I-NEXT: [[COPY:%[0-9]+]]:vrm4 = COPY $v8m4 + ; RV64I-NEXT: [[DEF:%[0-9]+]]:vrm8 = IMPLICIT_DEF + ; RV64I-NEXT: early-clobber %1:vrm8 = PseudoVZEXT_VF2_M8 [[DEF]], [[COPY]], -1, 6 /* e64 */, 3 /* ta, ma */ + ; RV64I-NEXT: $v8m8 = COPY %1 + ; RV64I-NEXT: PseudoRET implicit $v8m8 + %0:vrb() = COPY $v8m4 + %1:vrb() = G_ZEXT %0() + $v8m8 = COPY %1() + PseudoRET implicit $v8m8 + +... -- cgit v1.1 From be57c90feff81d067c83be1ab8927fb345c761cc Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Wed, 3 Apr 2024 23:14:31 +0000 Subject: Revert "dsymutil: Re-add missing -latomic (#85380)" This reverts commit 23616c65e7d632e750ddb67d55cc39098a69a8a6 because it breaks Fuchsia Clang toolchain builders. https://luci-milo.appspot.com/ui/p/fuchsia/builders/toolchain.ci/clang-linux-x64/b8751656876289840849/overview --- llvm/tools/dsymutil/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/dsymutil/CMakeLists.txt b/llvm/tools/dsymutil/CMakeLists.txt index 89225d4..efe28bd 100644 --- a/llvm/tools/dsymutil/CMakeLists.txt +++ b/llvm/tools/dsymutil/CMakeLists.txt @@ -44,4 +44,4 @@ if(APPLE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD) target_link_libraries(dsymutil PRIVATE "-framework CoreFoundation") endif(APPLE AND NOT LLVM_TOOL_LLVM_DRIVER_BUILD) -target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) +# target_link_libraries(dsymutil PRIVATE ${LLVM_ATOMIC_LIB}) -- cgit v1.1 From c511c90680eecae2e4adb87f442f41d465feb0f2 Mon Sep 17 00:00:00 2001 From: Kojo Acquah Date: Wed, 3 Apr 2024 16:24:18 -0700 Subject: [mlir][ArmNeon] Updates LowerContractionToSMMLAPattern with vecmat unroll patterns (#86005) Updates smmla unrolling patterns to handle vecmat contracts where `dimM=1`. This includes explicit vecmats in the form: `<1x8xi8> x <8x8xi8> --> <1x8xi32>` or implied with the leading dim folded: `<8xi8> x <8x8xi8> --> <8xi32>` Since the smmla operates on two `<2x8xi8>` input vectors to produce `<2x2xi8>` accumulators, half of each 2x2 accumulator tile is dummy data not pertinent to the computation, resulting in half throughput. --- .../Transforms/LowerContractionToSMMLAPattern.cpp | 98 ++++++++++------ mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir | 124 +++++++++++++++++++++ 2 files changed, 191 insertions(+), 31 deletions(-) diff --git a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp index 1f48d27..1374022 100644 --- a/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp +++ b/mlir/lib/Dialect/ArmNeon/Transforms/LowerContractionToSMMLAPattern.cpp @@ -40,8 +40,9 @@ static Type matchContainerType(Type element, Type container) { /// Lowering from a vector::contractOp arm neon smmla intrinsic. This will tile /// any vector.contract into multiple smmla instructions with unrolling so long -/// as [2,2,8] is a divisor of its shape. If no unrolling is necessary, a single -/// smmla instruction is emitted. +/// as [2,2,8] is a divisor of its shape. It can also process vecmats with dimM +/// = 1 (either explicitly or inferred if LHS has only dimK) If no unrolling is +/// necessary, a single smmla instruction is emitted. class LowerContractionToSMMLAPattern : public OpRewritePattern { public: @@ -49,32 +50,35 @@ public: LogicalResult matchAndRewrite(vector::ContractionOp op, PatternRewriter &rewriter) const override { Location loc = op.getLoc(); - // Check index maps that represent M N K in contract. - auto indexingMaps = op.getIndexingMapsArray(); - if (llvm::any_of(indexingMaps, [](mlir::AffineMap affineMap) { - return affineMap.isPermutation() || affineMap.getNumDims() != 3 || - affineMap.getNumResults() != 2; - })) { - return failure(); - } - // Check iterator types for contract. - auto iteratorTypes = op.getIteratorTypesArray(); - if (iteratorTypes.size() != 3 || - iteratorTypes[0] != vector::IteratorType::parallel || - iteratorTypes[1] != vector::IteratorType::parallel || - iteratorTypes[2] != vector::IteratorType::reduction) { - return failure(); - } - // Infer tile sizes from operands; Note: RHS is not transposed. + // Infer tile sizes from operands. For vecmat, LHS may only have 1 dim. + // Note: RHS is not transposed. mlir::VectorType lhsType = op.getLhsType(); mlir::VectorType rhsType = op.getRhsType(); - auto dimM = lhsType.getDimSize(0); + auto dimM = lhsType.getRank() == 1 ? 1 : lhsType.getDimSize(0); auto dimN = rhsType.getDimSize(0); - auto dimK = lhsType.getDimSize(1); - + auto dimK = rhsType.getDimSize(1); + bool isVecmat = dimM == 1 ? true : false; + if (lhsType.getDimSize(lhsType.getRank() - 1) != + rhsType.getDimSize(rhsType.getRank() - 1)) { + return failure(); // dimK mismatch + } // Unrolling patterns can handle any [2, 2, 8] shaped multiple of inputs for // tiling. - if (dimM % 2 != 0 || dimN % 2 != 0 || dimK % 8 != 0) { + if ((dimM % 2 != 0 && !isVecmat) || dimN % 2 != 0 || dimK % 8 != 0) { + return failure(); + } + + // Check iterator types for contract. All iterators except inner-most + // dimension must be parallel. + auto iteratorTypes = op.getIteratorTypesArray(); + if (iteratorTypes.size() > 3 || iteratorTypes[iteratorTypes.size() - 1] != + vector::IteratorType::reduction) { + return failure(); + } + if (llvm::any_of(ArrayRef(iteratorTypes).drop_back(1), + [](vector::IteratorType iteratorType) { + return iteratorType != vector::IteratorType::parallel; + })) { return failure(); } @@ -120,11 +124,14 @@ public: loc, op.getResultType(), rewriter.getZeroAttr(op.getResultType())); SmallVector unrolledSize = *op.getShapeForUnroll(); - SmallVector smmlaShape{2, 2, 8}; - SmallVector loopOrder{0, 1, 2}; + SmallVector smmlaShape{2, 8}; + SmallVector loopOrder{0, 1}; + if (unrolledSize.size() == 3) { + smmlaShape.insert(smmlaShape.begin(), isVecmat ? 1 : 2); + loopOrder.push_back(2); + } for (SmallVector offsets : StaticTileOffsetRange(unrolledSize, smmlaShape, loopOrder)) { - // Helper to compute the new shape of each operand and extract the slice. auto extractOperand = [&](Value operand, AffineMap permutationMap, ArrayRef operandOffsets) { @@ -150,16 +157,40 @@ public: Value tiledAcc = extractOperand(op.getAcc(), accPermutationMap, accOffsets); + auto inputElementType = + tiledLhs.getType().cast().getElementType(); + auto accElementType = + tiledAcc.getType().cast().getElementType(); + auto inputExpandedType = VectorType::get({2, 8}, inputElementType); + auto outputExpandedType = VectorType::get({2, 2}, accElementType); + + // With vecmat, tiled LHS and ACC will contain only one of 2 necessary + // rows along dimM. Expand their shapes to match the smmla op. + if (isVecmat) { + auto expandForSMMLA = [&](Value tiledOperand, + VectorType expandedTypeType) { + auto emptyOperand = rewriter.create( + loc, expandedTypeType, rewriter.getZeroAttr(expandedTypeType)); + SmallVector offsets( + emptyOperand.getType().cast().getRank(), 0); + SmallVector strides( + tiledOperand.getType().cast().getRank(), 1); + return rewriter.createOrFold( + loc, tiledOperand, emptyOperand, offsets, strides); + }; + tiledLhs = expandForSMMLA(tiledLhs, inputExpandedType); + tiledAcc = expandForSMMLA(tiledAcc, outputExpandedType); + } + // Collapse tiled operands to 1D vectors required by smmla intrinsic - auto collapsedInputType = VectorType::get( - tiledLhs.getType().cast().getNumElements(), - tiledLhs.getType().cast().getElementType()); - auto collapsedOutputType = VectorType::get( - {4}, tiledAcc.getType().cast().getElementType()); + auto collapsedInputType = + VectorType::get(inputExpandedType.getNumElements(), inputElementType); auto collapsedLhs = rewriter.createOrFold( tiledLhs.getLoc(), collapsedInputType, tiledLhs); auto collapsedRhs = rewriter.createOrFold( tiledRhs.getLoc(), collapsedInputType, tiledRhs); + auto collapsedOutputType = + VectorType::get(outputExpandedType.getNumElements(), accElementType); auto collapsedRes = rewriter.createOrFold( tiledAcc.getLoc(), collapsedOutputType, tiledAcc); @@ -172,6 +203,11 @@ public: Value tiledRes = rewriter.createOrFold( smmlaOp.getLoc(), tiledAcc.getType(), smmlaOp); + // With vecmat, only one row of tiled ACC can be inserted inot file result + if (isVecmat) { + tiledRes = rewriter.createOrFold(loc, tiledRes, 0); + } + // Insert the tiled result back into the non tiled result of the // contract op. SmallVector strides( diff --git a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir index e2be8745..46c4026 100644 --- a/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir +++ b/mlir/test/Dialect/ArmNeon/lower-to-arm-neon.mlir @@ -134,3 +134,127 @@ func.func @test_lower_vector_arm_neon_unroll_incompatible_shape(%lhs: vector<4x1 %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<4x12xi32>, vector<4x12xi32> into vector<4x4xi32> return %res : vector<4x4xi32> } + +// ----- + +// CHECK-LABEL: func.func @test_lower_vector_arm_neon_vecmat_unroll( +// CHECK-SAME: %[[VAL_0:.*]]: vector<8xi8>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<8x8xi8>, +// CHECK-SAME: %[[VAL_2:.*]]: vector<8xi32>) -> vector<8xi32> { +// CHECK: %[[VAL_3:.*]] = arith.constant dense<0> : vector<2x2xi32> +// CHECK: %[[VAL_4:.*]] = arith.constant dense<0> : vector<2x8xi8> +// CHECK: %[[VAL_5:.*]] = arith.constant dense<0> : vector<8xi32> +// CHECK: %[[VAL_6:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [0, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_7:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [0], sizes = [2], strides = [1]} : vector<8xi32> to vector<2xi32> +// CHECK: %[[VAL_8:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1]} : vector<8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_9:.*]] = vector.insert_strided_slice %[[VAL_7]], %[[VAL_3]] {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_10:.*]] = vector.shape_cast %[[VAL_8]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_11:.*]] = vector.shape_cast %[[VAL_6]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_12:.*]] = vector.shape_cast %[[VAL_9]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_13:.*]] = arm_neon.intr.smmla %[[VAL_12]], %[[VAL_10]], %[[VAL_11]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_14:.*]] = vector.shape_cast %[[VAL_13]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_15:.*]] = vector.extract %[[VAL_14]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_16:.*]] = vector.insert_strided_slice %[[VAL_15]], %[[VAL_5]] {offsets = [0], strides = [1]} : vector<2xi32> into vector<8xi32> +// CHECK: %[[VAL_17:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [2, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_18:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [2], sizes = [2], strides = [1]} : vector<8xi32> to vector<2xi32> +// CHECK: %[[VAL_19:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1]} : vector<8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_20:.*]] = vector.insert_strided_slice %[[VAL_18]], %[[VAL_3]] {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_21:.*]] = vector.shape_cast %[[VAL_19]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_22:.*]] = vector.shape_cast %[[VAL_17]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_23:.*]] = vector.shape_cast %[[VAL_20]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_24:.*]] = arm_neon.intr.smmla %[[VAL_23]], %[[VAL_21]], %[[VAL_22]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_25:.*]] = vector.shape_cast %[[VAL_24]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_26:.*]] = vector.extract %[[VAL_25]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_27:.*]] = vector.insert_strided_slice %[[VAL_26]], %[[VAL_16]] {offsets = [2], strides = [1]} : vector<2xi32> into vector<8xi32> +// CHECK: %[[VAL_28:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [4, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_29:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [4], sizes = [2], strides = [1]} : vector<8xi32> to vector<2xi32> +// CHECK: %[[VAL_30:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1]} : vector<8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_31:.*]] = vector.insert_strided_slice %[[VAL_29]], %[[VAL_3]] {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_32:.*]] = vector.shape_cast %[[VAL_30]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_33:.*]] = vector.shape_cast %[[VAL_28]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_34:.*]] = vector.shape_cast %[[VAL_31]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_35:.*]] = arm_neon.intr.smmla %[[VAL_34]], %[[VAL_32]], %[[VAL_33]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_36:.*]] = vector.shape_cast %[[VAL_35]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_37:.*]] = vector.extract %[[VAL_36]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_38:.*]] = vector.insert_strided_slice %[[VAL_37]], %[[VAL_27]] {offsets = [4], strides = [1]} : vector<2xi32> into vector<8xi32> +// CHECK: %[[VAL_39:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [6, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_40:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [6], sizes = [2], strides = [1]} : vector<8xi32> to vector<2xi32> +// CHECK: %[[VAL_41:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1]} : vector<8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_42:.*]] = vector.insert_strided_slice %[[VAL_40]], %[[VAL_3]] {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_43:.*]] = vector.shape_cast %[[VAL_41]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_44:.*]] = vector.shape_cast %[[VAL_39]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_45:.*]] = vector.shape_cast %[[VAL_42]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_46:.*]] = arm_neon.intr.smmla %[[VAL_45]], %[[VAL_43]], %[[VAL_44]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_47:.*]] = vector.shape_cast %[[VAL_46]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_48:.*]] = vector.extract %[[VAL_47]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_49:.*]] = vector.insert_strided_slice %[[VAL_48]], %[[VAL_38]] {offsets = [6], strides = [1]} : vector<2xi32> into vector<8xi32> +// CHECK: return %[[VAL_49]] : vector<8xi32> +// CHECK: } +func.func @test_lower_vector_arm_neon_vecmat_unroll(%lhs: vector<8xi8>, %rhs: vector<8x8xi8>, %acc : vector<8xi32>) -> vector<8xi32> { + %lhs_extsi= arith.extsi %lhs : vector<8xi8> to vector<8xi32> + %rhs_extsi = arith.extsi %rhs : vector<8x8xi8> to vector<8x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1) -> (d1)>, affine_map<(d0, d1) -> (d0, d1)>, affine_map<(d0, d1) -> (d0)>], iterator_types = ["parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<8xi32>, vector<8x8xi32> into vector<8xi32> + return %res : vector<8xi32> +} + +// ----- + +// CHECK-LABEL: func.func @test_lower_vector_arm_neon_vecmat_unroll_leading_dim( +// CHECK-SAME: %[[VAL_0:.*]]: vector<1x8xi8>, +// CHECK-SAME: %[[VAL_1:.*]]: vector<8x8xi8>, +// CHECK-SAME: %[[VAL_2:.*]]: vector<1x8xi32>) -> vector<1x8xi32> { +// CHECK: %[[VAL_3:.*]] = arith.constant dense<0> : vector<2x2xi32> +// CHECK: %[[VAL_4:.*]] = arith.constant dense<0> : vector<2x8xi8> +// CHECK: %[[VAL_5:.*]] = arith.constant dense<0> : vector<1x8xi32> +// CHECK: %[[VAL_6:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [0, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_7:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [0, 0], sizes = [1, 2], strides = [1, 1]} : vector<1x8xi32> to vector<1x2xi32> +// CHECK: %[[VAL_8:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1, 1]} : vector<1x8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_9:.*]] = vector.insert_strided_slice %[[VAL_7]], %[[VAL_3]] {offsets = [0, 0], strides = [1, 1]} : vector<1x2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_10:.*]] = vector.shape_cast %[[VAL_8]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_11:.*]] = vector.shape_cast %[[VAL_6]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_12:.*]] = vector.shape_cast %[[VAL_9]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_13:.*]] = arm_neon.intr.smmla %[[VAL_12]], %[[VAL_10]], %[[VAL_11]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_14:.*]] = vector.shape_cast %[[VAL_13]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_15:.*]] = vector.extract %[[VAL_14]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_16:.*]] = vector.insert_strided_slice %[[VAL_15]], %[[VAL_5]] {offsets = [0, 0], strides = [1]} : vector<2xi32> into vector<1x8xi32> +// CHECK: %[[VAL_17:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [2, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_18:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [0, 2], sizes = [1, 2], strides = [1, 1]} : vector<1x8xi32> to vector<1x2xi32> +// CHECK: %[[VAL_19:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1, 1]} : vector<1x8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_20:.*]] = vector.insert_strided_slice %[[VAL_18]], %[[VAL_3]] {offsets = [0, 0], strides = [1, 1]} : vector<1x2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_21:.*]] = vector.shape_cast %[[VAL_19]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_22:.*]] = vector.shape_cast %[[VAL_17]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_23:.*]] = vector.shape_cast %[[VAL_20]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_24:.*]] = arm_neon.intr.smmla %[[VAL_23]], %[[VAL_21]], %[[VAL_22]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_25:.*]] = vector.shape_cast %[[VAL_24]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_26:.*]] = vector.extract %[[VAL_25]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_27:.*]] = vector.insert_strided_slice %[[VAL_26]], %[[VAL_16]] {offsets = [0, 2], strides = [1]} : vector<2xi32> into vector<1x8xi32> +// CHECK: %[[VAL_28:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [4, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_29:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [0, 4], sizes = [1, 2], strides = [1, 1]} : vector<1x8xi32> to vector<1x2xi32> +// CHECK: %[[VAL_30:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1, 1]} : vector<1x8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_31:.*]] = vector.insert_strided_slice %[[VAL_29]], %[[VAL_3]] {offsets = [0, 0], strides = [1, 1]} : vector<1x2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_32:.*]] = vector.shape_cast %[[VAL_30]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_33:.*]] = vector.shape_cast %[[VAL_28]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_34:.*]] = vector.shape_cast %[[VAL_31]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_35:.*]] = arm_neon.intr.smmla %[[VAL_34]], %[[VAL_32]], %[[VAL_33]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_36:.*]] = vector.shape_cast %[[VAL_35]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_37:.*]] = vector.extract %[[VAL_36]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_38:.*]] = vector.insert_strided_slice %[[VAL_37]], %[[VAL_27]] {offsets = [0, 4], strides = [1]} : vector<2xi32> into vector<1x8xi32> +// CHECK: %[[VAL_39:.*]] = vector.extract_strided_slice %[[VAL_1]] {offsets = [6, 0], sizes = [2, 8], strides = [1, 1]} : vector<8x8xi8> to vector<2x8xi8> +// CHECK: %[[VAL_40:.*]] = vector.extract_strided_slice %[[VAL_2]] {offsets = [0, 6], sizes = [1, 2], strides = [1, 1]} : vector<1x8xi32> to vector<1x2xi32> +// CHECK: %[[VAL_41:.*]] = vector.insert_strided_slice %[[VAL_0]], %[[VAL_4]] {offsets = [0, 0], strides = [1, 1]} : vector<1x8xi8> into vector<2x8xi8> +// CHECK: %[[VAL_42:.*]] = vector.insert_strided_slice %[[VAL_40]], %[[VAL_3]] {offsets = [0, 0], strides = [1, 1]} : vector<1x2xi32> into vector<2x2xi32> +// CHECK: %[[VAL_43:.*]] = vector.shape_cast %[[VAL_41]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_44:.*]] = vector.shape_cast %[[VAL_39]] : vector<2x8xi8> to vector<16xi8> +// CHECK: %[[VAL_45:.*]] = vector.shape_cast %[[VAL_42]] : vector<2x2xi32> to vector<4xi32> +// CHECK: %[[VAL_46:.*]] = arm_neon.intr.smmla %[[VAL_45]], %[[VAL_43]], %[[VAL_44]] : vector<16xi8> to vector<4xi32> +// CHECK: %[[VAL_47:.*]] = vector.shape_cast %[[VAL_46]] : vector<4xi32> to vector<2x2xi32> +// CHECK: %[[VAL_48:.*]] = vector.extract %[[VAL_47]][0] : vector<2xi32> from vector<2x2xi32> +// CHECK: %[[VAL_49:.*]] = vector.insert_strided_slice %[[VAL_48]], %[[VAL_38]] {offsets = [0, 6], strides = [1]} : vector<2xi32> into vector<1x8xi32> +// CHECK: return %[[VAL_49]] : vector<1x8xi32> +// CHECK: } +func.func @test_lower_vector_arm_neon_vecmat_unroll_leading_dim(%lhs: vector<1x8xi8>, %rhs: vector<8x8xi8>, %acc : vector<1x8xi32>) -> vector<1x8xi32> { + %lhs_extsi= arith.extsi %lhs : vector<1x8xi8> to vector<1x8xi32> + %rhs_extsi = arith.extsi %rhs : vector<8x8xi8> to vector<8x8xi32> + %res = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d1, d2)>, affine_map<(d0, d1, d2) -> (d0, d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %lhs_extsi, %rhs_extsi, %acc : vector<1x8xi32>, vector<8x8xi32> into vector<1x8xi32> + return %res : vector<1x8xi32> +} -- cgit v1.1 From 66fed33db014bd705433e4b4f1ea766a8d71cadf Mon Sep 17 00:00:00 2001 From: Kojo Acquah Date: Wed, 3 Apr 2024 16:27:01 -0700 Subject: [mlir][vector] Update `castAwayContractionLeadingOneDim` to omit transposes solely on leading unit dims. (#85694) Updates `castAwayContractionLeadingOneDim` to check for leading unit dimensions before inserting `vector.transpose` ops. Currently `castAwayContractionLeadingOneDim` removes all leading unit dims based on the accumulator and transpose any subsequent operands to match the accumulator indexing. This does not take into account if the transpose is strictly necessary, for instance when given this vector-matrix contract: ```mlir %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind} %lhs, %rhs, %acc : vector<1x1x8xi32>, vector<1x8x8xi32> into vector<1x8xi32> ``` Passing this through `castAwayContractionLeadingOneDim` pattern produces the following: ```mlir %0 = vector.transpose %arg0, [1, 0, 2] : vector<1x1x8xi32> to vector<1x1x8xi32> %1 = vector.extract %0[0] : vector<1x8xi32> from vector<1x1x8xi32> %2 = vector.extract %arg2[0] : vector<8xi32> from vector<1x8xi32> %3 = vector.contract {indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d0, d1, d2)>, affine_map<(d0, d1, d2) -> (d1)>], iterator_types = ["parallel", "parallel", "reduction"], kind = #vector.kind} %1, %arg1, %2 : vector<1x8xi32>, vector<1x8x8xi32> into vector<8xi32> %4 = vector.broadcast %3 : vector<8xi32> to vector<1x8xi32> ``` The `vector.transpose` introduced does not affect the underlying data layout (effectively a no op), but it cannot be folded automatically. This change avoids inserting transposes when only leading unit dimensions are involved. Fixes #85691 --- .../Vector/Transforms/VectorDropLeadUnitDim.cpp | 21 +++++++++++++++++++-- .../Vector/vector-dropleadunitdim-transforms.mlir | 12 +++++++++++- 2 files changed, 30 insertions(+), 3 deletions(-) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp index 593c1e5..8d733c5 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorDropLeadUnitDim.cpp @@ -398,13 +398,30 @@ mlir::vector::castAwayContractionLeadingOneDim(vector::ContractionOp contractOp, transposeResults.push_back(targetExpr); } } + + // Checks if only the outer, unit dimensions (of size 1) are permuted. + // Such transposes do not materially effect the underlying vector and can + // be omitted. EG: perm [1, 0, 2] applied to vector<1x1x8xi32> + bool transposeNonOuterUnitDims = false; + auto operandShape = operands[it.index()].getType().cast(); + for (auto [index, dim] : + llvm::enumerate(ArrayRef(perm).drop_back(1))) { + if (dim != static_cast(index) && + operandShape.getDimSize(index) != 1) { + transposeNonOuterUnitDims = true; + break; + } + } + // Do the tranpose now if needed so that we can drop the // correct dim using extract later. if (tranposeNeeded) { map = AffineMap::get(map.getNumDims(), 0, transposeResults, contractOp.getContext()); - operands[it.index()] = rewriter.create( - loc, operands[it.index()], perm); + if (transposeNonOuterUnitDims) { + operands[it.index()] = rewriter.createOrFold( + loc, operands[it.index()], perm); + } } } // We have taken care to have the dim to be dropped be diff --git a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir index 3a120a5..252aeb0 100644 --- a/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir +++ b/mlir/test/Dialect/Vector/vector-dropleadunitdim-transforms.mlir @@ -238,6 +238,17 @@ func.func @cast_away_contraction_leading_one_dims_nonleadingunitdim_rank4_acctra return %0: vector<1x1x2x16xf32> } +// ----- + +// CHECK-LABEL: func.func @cast_away_contraction_does_not_transpose_leading_unit_dims +// CHECK-NOT vector.transpose +// CHECK: vector.contract +func.func @cast_away_contraction_does_not_transpose_leading_unit_dims(%lhs: vector<1x1x8xi32>, + %rhs: vector<1x8x8xi32>, + %acc: vector<1x8xi32>) -> vector<1x8xi32> { + %result = vector.contract {indexing_maps = [affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>, affine_map<(d0, d1, d2, d3) -> (d0, d2, d3)>, affine_map<(d0, d1, d2, d3) -> (d1, d2)>], iterator_types = ["parallel", "parallel", "parallel", "reduction"], kind = #vector.kind} %lhs, %rhs, %acc : vector<1x1x8xi32>, vector<1x8x8xi32> into vector<1x8xi32> + return %result : vector<1x8xi32> +} // ----- // CHECK-LABEL: func @cast_away_extract_strided_slice_leading_one_dims @@ -663,4 +674,3 @@ func.func @drop_unit_dims_scalar_cond_select(%cond: i1, %arg0: vector<1x16xi1>, %sel = arith.select %cond, %arg0, %arg1 : vector<1x16xi1> return %sel : vector<1x16xi1> } - -- cgit v1.1 From 5e3da75c80db749b3000c4a9e930da4784bcfc6f Mon Sep 17 00:00:00 2001 From: Dan Liew Date: Wed, 3 Apr 2024 16:28:54 -0700 Subject: [Bounds-Safety][NFC] Clean up leading space emission for CountAttributedType (#87582) Previously the leading space was added in each string constant. This patch moves the leading space out of the string constants and is instead explicitly added to add clarity to the code. --- clang/lib/AST/TypePrinter.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index 9d551ff..d0771eb 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1746,14 +1746,15 @@ void TypePrinter::printPackExpansionAfter(const PackExpansionType *T, static void printCountAttributedImpl(const CountAttributedType *T, raw_ostream &OS, const PrintingPolicy &Policy) { + OS << ' '; if (T->isCountInBytes() && T->isOrNull()) - OS << " __sized_by_or_null("; + OS << "__sized_by_or_null("; else if (T->isCountInBytes()) - OS << " __sized_by("; + OS << "__sized_by("; else if (T->isOrNull()) - OS << " __counted_by_or_null("; + OS << "__counted_by_or_null("; else - OS << " __counted_by("; + OS << "__counted_by("; if (T->getCountExpr()) T->getCountExpr()->printPretty(OS, nullptr, Policy); OS << ')'; -- cgit v1.1 From 20433e9b2483d64843310e97062541dd73f54436 Mon Sep 17 00:00:00 2001 From: Chelsea Cassanova Date: Wed, 3 Apr 2024 16:34:03 -0700 Subject: Revert "DebugInfoD issues, take 2" (#87583) Reverts llvm/llvm-project#86812. This commit caused a regression on the x86_64 MacOS buildbot: https://green.lab.llvm.org/job/llvm.org/view/LLDB/job/lldb-cmake/784/ --- .../Python/lldbsuite/test/make/Makefile.rules | 26 +-- .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 38 ++--- lldb/source/Plugins/SymbolLocator/CMakeLists.txt | 7 +- .../Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp | 29 +--- lldb/test/API/debuginfod/Normal/Makefile | 19 --- lldb/test/API/debuginfod/Normal/TestDebuginfod.py | 179 -------------------- lldb/test/API/debuginfod/Normal/main.c | 7 - lldb/test/API/debuginfod/SplitDWARF/Makefile | 23 --- .../API/debuginfod/SplitDWARF/TestDebuginfodDWP.py | 188 --------------------- lldb/test/API/debuginfod/SplitDWARF/main.c | 7 - 10 files changed, 17 insertions(+), 506 deletions(-) delete mode 100644 lldb/test/API/debuginfod/Normal/Makefile delete mode 100644 lldb/test/API/debuginfod/Normal/TestDebuginfod.py delete mode 100644 lldb/test/API/debuginfod/Normal/main.c delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/Makefile delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/main.c diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index ee8793f..bfd249c 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -51,7 +51,7 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../ # # GNUWin32 uname gives "windows32" or "server version windows32" while # some versions of MSYS uname return "MSYS_NT*", but most environments -# standardize on "Windows_NT", so we'll make it consistent here. +# standardize on "Windows_NT", so we'll make it consistent here. # When running tests from Visual Studio, the environment variable isn't # inherited all the way down to the process spawned for make. #---------------------------------------------------------------------- @@ -210,12 +210,6 @@ else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" DSYM = $(EXE).debug endif - - ifeq "$(MAKE_DWP)" "YES" - MAKE_DWO := YES - DWP_NAME = $(EXE).dwp - DYLIB_DWP_NAME = $(DYLIB_NAME).dwp - endif endif LIMIT_DEBUG_INFO_FLAGS = @@ -363,7 +357,6 @@ ifneq "$(OS)" "Darwin" OBJCOPY ?= $(call replace_cc_with,objcopy) ARCHIVER ?= $(call replace_cc_with,ar) - DWP ?= $(call replace_cc_with,dwp) override AR = $(ARCHIVER) endif @@ -534,10 +527,6 @@ ifneq "$(CXX)" "" endif endif -ifeq "$(GEN_GNU_BUILD_ID)" "YES" - LDFLAGS += -Wl,--build-id -endif - #---------------------------------------------------------------------- # DYLIB_ONLY variable can be used to skip the building of a.out. # See the sections below regarding dSYM file as well as the building of @@ -576,17 +565,10 @@ else endif else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" -ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" - cp "$(EXE)" "$(EXE).unstripped" -endif $(OBJCOPY) --only-keep-debug "$(EXE)" "$(DSYM)" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DSYM)" "$(EXE)" "$(EXE)" endif -ifeq "$(MAKE_DWP)" "YES" - $(DWP) -o "$(DWP_NAME)" $(DWOS) endif -endif - #---------------------------------------------------------------------- # Make the dylib @@ -628,15 +610,9 @@ endif else $(LD) $(DYLIB_OBJECTS) $(LDFLAGS) -shared -o "$(DYLIB_FILENAME)" ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" - ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" - cp "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).unstripped" - endif $(OBJCOPY) --only-keep-debug "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).debug" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DYLIB_FILENAME).debug" "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME)" endif -ifeq "$(MAKE_DWP)" "YES" - $(DWP) -o $(DYLIB_DWP_FILE) $(DYLIB_DWOS) -endif endif #---------------------------------------------------------------------- diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index dafdf24..49f13d2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4378,38 +4378,26 @@ const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); ModuleSpec module_spec; module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec(); - FileSpec dwp_filespec; for (const auto &symfile : symfiles.files()) { module_spec.GetSymbolFileSpec() = FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle()); LLDB_LOG(log, "Searching for DWP using: \"{0}\"", module_spec.GetSymbolFileSpec()); - dwp_filespec = + FileSpec dwp_filespec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); if (FileSystem::Instance().Exists(dwp_filespec)) { - break; - } - } - if (!FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "No DWP file found locally"); - // Fill in the UUID for the module we're trying to match for, so we can - // find the correct DWP file, as the Debuginfod plugin uses *only* this - // data to correctly match the DWP file with the binary. - module_spec.GetUUID() = m_objfile_sp->GetUUID(); - dwp_filespec = - PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - } - if (FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - GetObjectFile()->GetModule(), &dwp_filespec, 0, - FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, - dwp_file_data_offset); - if (dwp_obj_file) { - m_dwp_symfile = std::make_shared( - *this, dwp_obj_file, DIERef::k_file_index_mask); + LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + GetObjectFile()->GetModule(), &dwp_filespec, 0, + FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, + dwp_file_data_offset); + if (dwp_obj_file) { + m_dwp_symfile = std::make_shared( + *this, dwp_obj_file, DIERef::k_file_index_mask); + break; + } } } if (!m_dwp_symfile) { diff --git a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt index 3367022..ca96962 100644 --- a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt @@ -1,10 +1,5 @@ -# Order matters here: the first symbol locator prevents further searching. -# For DWARF binaries that are both stripped and split, the Default plugin -# will return the stripped binary when asked for the ObjectFile, which then -# prevents an unstripped binary from being requested from the Debuginfod -# provider. -add_subdirectory(Debuginfod) add_subdirectory(Default) if (CMAKE_SYSTEM_NAME MATCHES "Darwin") add_subdirectory(DebugSymbols) endif() +add_subdirectory(Debuginfod) diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index f296e65..b5fe35d 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -44,24 +44,6 @@ llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() { "executables."; } -// If this is needed elsewhere, it can be exported/moved. -static bool IsDwpSymbolFile(const lldb::ModuleSP &module_sp, - const FileSpec &file_spec) { - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - // Try to create an ObjectFile from the file_spec. - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec), - dwp_file_data_sp, dwp_file_data_offset); - // The presence of a debug_cu_index section is the key identifying feature of - // a DWP file. Make sure we don't fill in the section list on dwp_obj_file - // (by calling GetSectionList(false)) as this function could be called before - // we may have all the symbol files collected and available. - return dwp_obj_file && ObjectFileELF::classof(dwp_obj_file.get()) && - dwp_obj_file->GetSectionList(false)->FindSectionByType( - eSectionTypeDWARFDebugCuIndex, false); -} - // CreateInstance // // Platforms can register a callback to use when creating symbol vendors to @@ -105,15 +87,8 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); FileSpec dsym_fspec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - if (!dsym_fspec || IsDwpSymbolFile(module_sp, dsym_fspec)) { - // If we have a stripped binary or if we got a DWP file, we should prefer - // symbols in the executable acquired through a plugin. - ModuleSpec unstripped_spec = - PluginManager::LocateExecutableObjectFile(module_spec); - if (!unstripped_spec) - return nullptr; - dsym_fspec = unstripped_spec.GetFileSpec(); - } + if (!dsym_fspec) + return nullptr; DataBufferSP dsym_file_data_sp; lldb::offset_t dsym_file_data_offset = 0; diff --git a/lldb/test/API/debuginfod/Normal/Makefile b/lldb/test/API/debuginfod/Normal/Makefile deleted file mode 100644 index 54bd7ad..0000000 --- a/lldb/test/API/debuginfod/Normal/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -C_SOURCES := main.c - -# For normal (non DWP) Debuginfod tests, we need: - -# * The full binary: a.out.unstripped -# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and -# SPLIT_DEBUG_SYMBOLS set to YES - -# * The stripped binary (a.out) -# Produced by Makefile.rules with SPLIT_DEBUG_SYMBOLS set to YES - -# * The 'only-keep-debug' binary (a.out.debug) -# Produced below - -SPLIT_DEBUG_SYMBOLS := YES -SAVE_FULL_DEBUG_BINARY := YES -GEN_GNU_BUILD_ID := YES - -include Makefile.rules diff --git a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py deleted file mode 100644 index 2e87228..0000000 --- a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py +++ /dev/null @@ -1,179 +0,0 @@ -import os -import shutil -import tempfile - -import lldb -from lldbsuite.test.decorators import * -import lldbsuite.test.lldbutil as lldbutil -from lldbsuite.test.lldbtest import * - - -""" -Test support for the DebugInfoD network symbol acquisition protocol. -This one is for simple / no split-dwarf scenarios. - -For no-split-dwarf scenarios, there are 2 variations: -1 - A stripped binary with it's corresponding unstripped binary: -2 - A stripped binary with a corresponding --only-keep-debug symbols file -""" - - -# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots -@skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) -class DebugInfodTests(TestBase): - # No need to try every flavor of debug inf. - NO_DEBUG_INFO_TESTCASE = True - - def test_normal_no_symbols(self): - """ - Validate behavior with no symbols or symbol locator. - ('baseline negative' behavior) - """ - test_root = self.config_test(["a.out"]) - self.try_breakpoint(False) - - def test_normal_default(self): - """ - Validate behavior with symbols, but no symbol locator. - ('baseline positive' behavior) - """ - test_root = self.config_test(["a.out", "a.out.debug"]) - self.try_breakpoint(True) - - def test_debuginfod_symbols(self): - """ - Test behavior with the full binary available from Debuginfod as - 'debuginfo' from the plug-in. - """ - test_root = self.config_test(["a.out"], "a.out.unstripped") - self.try_breakpoint(True) - - def test_debuginfod_executable(self): - """ - Test behavior with the full binary available from Debuginfod as - 'executable' from the plug-in. - """ - test_root = self.config_test(["a.out"], None, "a.out.unstripped") - self.try_breakpoint(True) - - def test_debuginfod_okd_symbols(self): - """ - Test behavior with the 'only-keep-debug' symbols available from Debuginfod. - """ - test_root = self.config_test(["a.out"], "a.out.debug") - self.try_breakpoint(True) - - def try_breakpoint(self, should_have_loc): - """ - This function creates a target from self.aout, sets a function-name - breakpoint, and checks to see if we have a file/line location, - as a way to validate that the symbols have been loaded. - should_have_loc specifies if we're testing that symbols have or - haven't been loaded. - """ - target = self.dbg.CreateTarget(self.aout) - self.assertTrue(target and target.IsValid(), "Target is valid") - - bp = target.BreakpointCreateByName("func") - self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") - self.assertEqual(bp.GetNumLocations(), 1) - - loc = bp.GetLocationAtIndex(0) - self.assertTrue(loc and loc.IsValid(), "Location is valid") - addr = loc.GetAddress() - self.assertTrue(addr and addr.IsValid(), "Loc address is valid") - line_entry = addr.GetLineEntry() - self.assertEqual( - should_have_loc, - line_entry != None and line_entry.IsValid(), - "Loc line entry is valid", - ) - if should_have_loc: - self.assertEqual(line_entry.GetLine(), 4) - self.assertEqual( - line_entry.GetFileSpec().GetFilename(), - self.main_source_file.GetFilename(), - ) - self.dbg.DeleteTarget(target) - shutil.rmtree(self.tmp_dir) - - def config_test(self, local_files, debuginfo=None, executable=None): - """ - Set up a test with local_files[] copied to a different location - so that we control which files are, or are not, found in the file system. - Also, create a stand-alone file-system 'hosted' debuginfod server with the - provided debuginfo and executable files (if they exist) - - Make the filesystem look like: - - /tmp//test/[local_files] - - /tmp//cache (for lldb to use as a temp cache) - - /tmp//buildid//executable -> - /tmp//buildid//debuginfo -> - Returns the /tmp/ path - """ - - self.build() - - uuid = self.getUUID("a.out") - if not uuid: - self.fail("Could not get UUID for a.out") - return - self.main_source_file = lldb.SBFileSpec("main.c") - self.tmp_dir = tempfile.mkdtemp() - test_dir = os.path.join(self.tmp_dir, "test") - os.makedirs(test_dir) - - self.aout = "" - # Copy the files used by the test: - for f in local_files: - shutil.copy(self.getBuildArtifact(f), test_dir) - # The first item is the binary to be used for the test - if self.aout == "": - self.aout = os.path.join(test_dir, f) - - use_debuginfod = debuginfo != None or executable != None - - # Populated the 'file://... mocked' Debuginfod server: - if use_debuginfod: - os.makedirs(os.path.join(self.tmp_dir, "cache")) - uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) - os.makedirs(uuid_dir) - if debuginfo: - shutil.copy( - self.getBuildArtifact(debuginfo), - os.path.join(uuid_dir, "debuginfo"), - ) - if executable: - shutil.copy( - self.getBuildArtifact(executable), - os.path.join(uuid_dir, "executable"), - ) - - # Configure LLDB for the test: - self.runCmd( - "settings set symbols.enable-external-lookup %s" - % str(use_debuginfod).lower() - ) - self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") - if use_debuginfod: - self.runCmd( - "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" - % self.tmp_dir - ) - self.runCmd( - "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" - % self.tmp_dir - ) - - def getUUID(self, filename): - try: - target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) - module = target.GetModuleAtIndex(0) - uuid = module.GetUUIDString().replace("-", "").lower() - self.dbg.DeleteTarget(target) - return uuid if len(uuid) == 40 else None - except: - return None diff --git a/lldb/test/API/debuginfod/Normal/main.c b/lldb/test/API/debuginfod/Normal/main.c deleted file mode 100644 index 4c71846..0000000 --- a/lldb/test/API/debuginfod/Normal/main.c +++ /dev/null @@ -1,7 +0,0 @@ -// This is a dump little pair of test files - -int func(int argc, const char *argv[]) { - return (argc + 1) * (argv[argc][0] + 2); -} - -int main(int argc, const char *argv[]) { return func(0, argv); } diff --git a/lldb/test/API/debuginfod/SplitDWARF/Makefile b/lldb/test/API/debuginfod/SplitDWARF/Makefile deleted file mode 100644 index 3ab9a96..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -C_SOURCES := main.c - -# For split-dwarf Debuginfod tests, we need: - -# * A .DWP file (a.out.dwp) -# Produced by Makefile.rules with MAKE_DWP set to YES - -# * The "full" binary (missing things that live in .dwo's) (a.out.unstripped) -# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and -# SPLIT_DEBUG_SYMBOLS set to YES - -# * The stripped binary (a.out) -# Produced by Makefile.rules - -# * The 'only-keep-debug' binary (a.out.debug) -# Produced below - -MAKE_DWP := YES -SPLIT_DEBUG_SYMBOLS := YES -SAVE_FULL_DEBUG_BINARY := YES -GEN_GNU_BUILD_ID := YES - -include Makefile.rules diff --git a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py deleted file mode 100644 index 90db352..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py +++ /dev/null @@ -1,188 +0,0 @@ -""" -Test support for the DebugInfoD network symbol acquisition protocol. -""" -import os -import shutil -import tempfile - -import lldb -from lldbsuite.test.decorators import * -import lldbsuite.test.lldbutil as lldbutil -from lldbsuite.test.lldbtest import * - - -""" -Test support for the DebugInfoD network symbol acquisition protocol. -This file is for split-dwarf (dwp) scenarios. - -1 - A split binary target with it's corresponding DWP file -2 - A stripped, split binary target with an unstripped binary and a DWP file -3 - A stripped, split binary target with an --only-keep-debug symbols file and a DWP file -""" - - -# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots -@skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) -class DebugInfodDWPTests(TestBase): - # No need to try every flavor of debug inf. - NO_DEBUG_INFO_TESTCASE = True - - def test_normal_stripped(self): - """ - Validate behavior with a stripped binary, no symbols or symbol locator. - """ - self.config_test(["a.out"]) - self.try_breakpoint(False) - - def test_normal_stripped_split_with_dwp(self): - """ - Validate behavior with symbols, but no symbol locator. - """ - self.config_test(["a.out", "a.out.debug", "a.out.dwp"]) - self.try_breakpoint(True) - - def test_normal_stripped_only_dwp(self): - """ - Validate behavior *with* dwp symbols only, but missing other symbols, - but no symbol locator. This shouldn't work: without the other symbols - DWO's appear mostly useless. - """ - self.config_test(["a.out", "a.out.dwp"]) - self.try_breakpoint(False) - - def test_debuginfod_dwp_from_service(self): - """ - Test behavior with the unstripped binary, and DWP from the service. - """ - self.config_test(["a.out.debug"], "a.out.dwp") - self.try_breakpoint(True) - - def test_debuginfod_both_symfiles_from_service(self): - """ - Test behavior with a stripped binary, with the unstripped binary and - dwp symbols from Debuginfod. - """ - self.config_test(["a.out"], "a.out.dwp", "a.out.unstripped") - self.try_breakpoint(True) - - def test_debuginfod_both_okd_symfiles_from_service(self): - """ - Test behavior with both the only-keep-debug symbols and the dwp symbols - from Debuginfod. - """ - self.config_test(["a.out"], "a.out.dwp", "a.out.debug") - self.try_breakpoint(True) - - def try_breakpoint(self, should_have_loc): - """ - This function creates a target from self.aout, sets a function-name - breakpoint, and checks to see if we have a file/line location, - as a way to validate that the symbols have been loaded. - should_have_loc specifies if we're testing that symbols have or - haven't been loaded. - """ - target = self.dbg.CreateTarget(self.aout) - self.assertTrue(target and target.IsValid(), "Target is valid") - - bp = target.BreakpointCreateByName("func") - self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") - self.assertEqual(bp.GetNumLocations(), 1) - - loc = bp.GetLocationAtIndex(0) - self.assertTrue(loc and loc.IsValid(), "Location is valid") - addr = loc.GetAddress() - self.assertTrue(addr and addr.IsValid(), "Loc address is valid") - line_entry = addr.GetLineEntry() - self.assertEqual( - should_have_loc, - line_entry != None and line_entry.IsValid(), - "Loc line entry is valid", - ) - if should_have_loc: - self.assertEqual(line_entry.GetLine(), 4) - self.assertEqual( - line_entry.GetFileSpec().GetFilename(), - self.main_source_file.GetFilename(), - ) - self.dbg.DeleteTarget(target) - shutil.rmtree(self.tmp_dir) - - def config_test(self, local_files, debuginfo=None, executable=None): - """ - Set up a test with local_files[] copied to a different location - so that we control which files are, or are not, found in the file system. - Also, create a stand-alone file-system 'hosted' debuginfod server with the - provided debuginfo and executable files (if they exist) - - Make the filesystem look like: - - /tmp//test/[local_files] - - /tmp//cache (for lldb to use as a temp cache) - - /tmp//buildid//executable -> - /tmp//buildid//debuginfo -> - Returns the /tmp/ path - """ - - self.build() - - uuid = self.getUUID("a.out") - if not uuid: - self.fail("Could not get UUID for a.out") - return - self.main_source_file = lldb.SBFileSpec("main.c") - self.tmp_dir = tempfile.mkdtemp() - self.test_dir = os.path.join(self.tmp_dir, "test") - os.makedirs(self.test_dir) - - self.aout = "" - # Copy the files used by the test: - for f in local_files: - shutil.copy(self.getBuildArtifact(f), self.test_dir) - if self.aout == "": - self.aout = os.path.join(self.test_dir, f) - - use_debuginfod = debuginfo != None or executable != None - - # Populated the 'file://... mocked' Debuginfod server: - if use_debuginfod: - os.makedirs(os.path.join(self.tmp_dir, "cache")) - uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) - os.makedirs(uuid_dir) - if debuginfo: - shutil.copy( - self.getBuildArtifact(debuginfo), - os.path.join(uuid_dir, "debuginfo"), - ) - if executable: - shutil.copy( - self.getBuildArtifact(executable), - os.path.join(uuid_dir, "executable"), - ) - os.remove(self.getBuildArtifact("main.dwo")) - # Configure LLDB for the test: - self.runCmd( - "settings set symbols.enable-external-lookup %s" - % str(use_debuginfod).lower() - ) - self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") - if use_debuginfod: - self.runCmd( - "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" - % self.tmp_dir - ) - self.runCmd( - "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" - % self.tmp_dir - ) - - def getUUID(self, filename): - try: - target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) - module = target.GetModuleAtIndex(0) - uuid = module.GetUUIDString().replace("-", "").lower() - self.dbg.DeleteTarget(target) - return uuid if len(uuid) == 40 else None - except: - return None diff --git a/lldb/test/API/debuginfod/SplitDWARF/main.c b/lldb/test/API/debuginfod/SplitDWARF/main.c deleted file mode 100644 index 4c71846..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/main.c +++ /dev/null @@ -1,7 +0,0 @@ -// This is a dump little pair of test files - -int func(int argc, const char *argv[]) { - return (argc + 1) * (argv[argc][0] + 2); -} - -int main(int argc, const char *argv[]) { return func(0, argv); } -- cgit v1.1 From 750843875254b1d493f4d7c3d3921c1bced55b7c Mon Sep 17 00:00:00 2001 From: Yeoul Na Date: Wed, 3 Apr 2024 16:35:11 -0700 Subject: [BoundsSafety] Minor fixes on counted_by (#87559) DeclRef to field must be marked as LValue to be consistent with how the field decl will be evaluated. T->desugar() is unnecessary to call ->isArrayType(). --- clang/lib/AST/TypePrinter.cpp | 4 ++-- clang/lib/Sema/SemaExpr.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/AST/TypePrinter.cpp b/clang/lib/AST/TypePrinter.cpp index d0771eb..075c8aba 100644 --- a/clang/lib/AST/TypePrinter.cpp +++ b/clang/lib/AST/TypePrinter.cpp @@ -1763,14 +1763,14 @@ static void printCountAttributedImpl(const CountAttributedType *T, void TypePrinter::printCountAttributedBefore(const CountAttributedType *T, raw_ostream &OS) { printBefore(T->desugar(), OS); - if (!T->desugar()->isArrayType()) + if (!T->isArrayType()) printCountAttributedImpl(T, OS, Policy); } void TypePrinter::printCountAttributedAfter(const CountAttributedType *T, raw_ostream &OS) { printAfter(T->desugar(), OS); - if (T->desugar()->isArrayType()) + if (T->isArrayType()) printCountAttributedImpl(T, OS, Policy); } diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 80b4257..6b2eb24 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -2751,7 +2751,7 @@ Sema::ActOnIdExpression(Scope *S, CXXScopeSpec &SS, QualType type = VD->getType().getNonReferenceType(); // This will eventually be translated into MemberExpr upon // the use of instantiated struct fields. - return BuildDeclRefExpr(VD, type, VK_PRValue, NameLoc); + return BuildDeclRefExpr(VD, type, VK_LValue, NameLoc); } } } -- cgit v1.1 From 622851a9059694487811a7f6078312fc2cce5486 Mon Sep 17 00:00:00 2001 From: Jason Molenda Date: Wed, 3 Apr 2024 16:40:34 -0700 Subject: [lldb] Set static Module's load addresses via ObjectFile (#87439) This is a followup to https://github.com/llvm/llvm-project/pull/86359 "[lldb] [ObjectFileMachO] LLVM_COV is not mapped into firmware memory (#86359)" where I treat LLVM_COV segments in a Mach-O binary as non-loadable. There is another codepath in `DynamicLoaderStatic::LoadAllImagesAtFileAddresses` which is called to set the load addresses for a Module to the file addresses. It has no logic to detect a segment that is not loaded in virtual memory (ObjectFileMachO::SectionIsLoadable), so it would set the load address for this LLVM_COV segment to the file address and shadow actual code, breaking lldb behavior. This method currently sets the load address for any section that doesn't have a load address set already. This presumes that a Module was added to the Target, some mechanism set the correct load address for SOME segments, and then this method is going to set the other segments to a no-slide value, assuming they were forgotten. ObjectFile base class doesn't, today, vend a SectionIsLoadable method, but we do have ObjectFile::SetLoadAddress and at a higher level, Module::SetLoadAddress, when we're setting the same slide to all segments. That's the behavior we want in this method. If any section has a load address, we don't touch this Module. Otherwise we set all sections to have a load address that is the same as the file address. I also audited the other parts of lldb that are calling SectionList::SectionLoadAddress and looked if they should be more correctly using Module::SetLoadAddress for the entire binary. But in most cases, we have the potential for different slides for different sections so this section-by-section approach must be taken. rdar://125800290 --- .../DynamicLoader/Static/DynamicLoaderStatic.cpp | 40 +++++++++------------- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp index a39aa228..5459981 100644 --- a/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp +++ b/lldb/source/Plugins/DynamicLoader/Static/DynamicLoaderStatic.cpp @@ -84,51 +84,43 @@ void DynamicLoaderStatic::LoadAllImagesAtFileAddresses() { // Disable JIT for static dynamic loader targets m_process->SetCanJIT(false); + Target &target = m_process->GetTarget(); for (ModuleSP module_sp : module_list.Modules()) { if (module_sp) { bool changed = false; + bool no_load_addresses = true; + // If this module has a section with a load address set in + // the target, assume all necessary work is already done. There + // may be sections without a load address set intentionally + // and we don't want to mutate that. + // For a module with no load addresses set, set the load addresses + // to slide == 0, the same as the file addresses, in the target. ObjectFile *image_object_file = module_sp->GetObjectFile(); if (image_object_file) { SectionList *section_list = image_object_file->GetSectionList(); if (section_list) { - // All sections listed in the dyld image info structure will all - // either be fixed up already, or they will all be off by a single - // slide amount that is determined by finding the first segment that - // is at file offset zero which also has bytes (a file size that is - // greater than zero) in the object file. - - // Determine the slide amount (if any) const size_t num_sections = section_list->GetSize(); - size_t sect_idx = 0; - for (sect_idx = 0; sect_idx < num_sections; ++sect_idx) { - // Iterate through the object file sections to find the first - // section that starts of file offset zero and that has bytes in - // the file... + for (size_t sect_idx = 0; sect_idx < num_sections; ++sect_idx) { SectionSP section_sp(section_list->GetSectionAtIndex(sect_idx)); if (section_sp) { - // If this section already has a load address set in the target, - // don't re-set it to the file address. Something may have - // set it to a more correct value already. - if (m_process->GetTarget() - .GetSectionLoadList() - .GetSectionLoadAddress(section_sp) != - LLDB_INVALID_ADDRESS) { - continue; + if (target.GetSectionLoadList().GetSectionLoadAddress( + section_sp) != LLDB_INVALID_ADDRESS) { + no_load_addresses = false; + break; } - if (m_process->GetTarget().SetSectionLoadAddress( - section_sp, section_sp->GetFileAddress())) - changed = true; } } } } + if (no_load_addresses) + module_sp->SetLoadAddress(target, 0, true /*value_is_offset*/, changed); if (changed) loaded_module_list.AppendIfNeeded(module_sp); } } - m_process->GetTarget().ModulesDidLoad(loaded_module_list); + target.ModulesDidLoad(loaded_module_list); } ThreadPlanSP -- cgit v1.1 From ef5a7109116c1615a9c99c8dba6577853beb6c73 Mon Sep 17 00:00:00 2001 From: Han-Chung Wang Date: Wed, 3 Apr 2024 17:00:56 -0700 Subject: [mlir][vector] Skip 0D vectors in vector linearization. (#87577) --- mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp | 3 +++ mlir/test/Dialect/Vector/linearize.mlir | 10 ++++++++++ 2 files changed, 13 insertions(+) diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp index 4fa5b8a..b59e906 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorLinearize.cpp @@ -26,6 +26,9 @@ static bool isLessThanTargetBitWidth(Operation *op, unsigned targetBitWidth) { // Reject index since getElementTypeBitWidth will abort for Index types. if (!vecType || vecType.getElementType().isIndex()) return false; + // There are no dimension to fold if it is a 0-D vector. + if (vecType.getRank() == 0) + return false; unsigned trailingVecDimBitWidth = vecType.getShape().back() * vecType.getElementTypeBitWidth(); if (trailingVecDimBitWidth >= targetBitWidth) diff --git a/mlir/test/Dialect/Vector/linearize.mlir b/mlir/test/Dialect/Vector/linearize.mlir index f0e9b3a..212541c 100644 --- a/mlir/test/Dialect/Vector/linearize.mlir +++ b/mlir/test/Dialect/Vector/linearize.mlir @@ -146,6 +146,16 @@ func.func @test_scalable_no_linearize(%arg0: vector<[2]x[2]xf32>) -> vector<[2]x // ----- +// ALL-LABEL: func.func @test_0d_vector +func.func @test_0d_vector() -> vector { + // ALL: %[[CST:.+]] = arith.constant dense<0.000000e+00> : vector + %0 = arith.constant dense<0.0> : vector + // ALL: return %[[CST]] + return %0 : vector +} + +// ----- + func.func @test_scalable_no_linearize(%arg0: vector<2x[2]xf32>) -> vector<2x[2]xf32> { // expected-error@+1 {{failed to legalize operation 'arith.constant' that was explicitly marked illegal}} %0 = arith.constant dense<[[1., 1.], [3., 3.]]> : vector<2x[2]xf32> -- cgit v1.1 From 7e2a1d6f23cb604203324b47237f8e463704a497 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 3 Apr 2024 17:11:45 -0700 Subject: [RISCV] Remove G_TRUNC/ZEXT/SEXT/ANYEXT from the first switch in RISCVRegisterBankInfo::getInstrMapping. This removes the special case for vectors. The default case in the second switch can handle GPR in addition to vectors. We just won't use the static ValueMapping entry. --- llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index bab95c5..4f34514 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -320,20 +320,10 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_INTTOPTR: - case TargetOpcode::G_TRUNC: case TargetOpcode::G_SEXTLOAD: case TargetOpcode::G_ZEXTLOAD: return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, NumOperands); - case TargetOpcode::G_ANYEXT: - case TargetOpcode::G_SEXT: - case TargetOpcode::G_ZEXT: { - // Handle vector extends in the default case below. - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) - break; - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, - NumOperands); - } case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: -- cgit v1.1 From 97523e5321be2542e8e117443ddea10b3f572ae7 Mon Sep 17 00:00:00 2001 From: Shih-Po Hung Date: Thu, 4 Apr 2024 08:30:15 +0800 Subject: [RISCV][TTI] Scale the cost of intrinsic stepvector with LMUL (#87301) Use the return type to measure the LMUL size for latency/throughput cost --- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 9 +- llvm/test/Analysis/CostModel/RISCV/stepvector.ll | 143 ++++++++------------- 2 files changed, 61 insertions(+), 91 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 38304ff..27a4d78 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -861,9 +861,14 @@ RISCVTTIImpl::getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, } // TODO: add more intrinsic case Intrinsic::experimental_stepvector: { - unsigned Cost = 1; // vid auto LT = getTypeLegalizationCost(RetTy); - return Cost + (LT.first - 1); + // Legalisation of illegal types involves an `index' instruction plus + // (LT.first - 1) vector adds. + if (ST->hasVInstructions()) + return getRISCVInstructionCost(RISCV::VID_V, LT.second, CostKind) + + (LT.first - 1) * + getRISCVInstructionCost(RISCV::VADD_VX, LT.second, CostKind); + return 1 + (LT.first - 1); } case Intrinsic::vp_rint: { // RISC-V target uses at least 5 instructions to lower rounding intrinsics. diff --git a/llvm/test/Analysis/CostModel/RISCV/stepvector.ll b/llvm/test/Analysis/CostModel/RISCV/stepvector.ll index 7d29d2c..e599955 100644 --- a/llvm/test/Analysis/CostModel/RISCV/stepvector.ll +++ b/llvm/test/Analysis/CostModel/RISCV/stepvector.ll @@ -4,98 +4,60 @@ define void @stepvector() { ; CHECK-LABEL: 'stepvector' -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %zero = call @llvm.experimental.stepvector.nxv1i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call @llvm.experimental.stepvector.nxv2i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call @llvm.experimental.stepvector.nxv4i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call @llvm.experimental.stepvector.nxv8i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %1 = call @llvm.experimental.stepvector.nxv1i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %2 = call @llvm.experimental.stepvector.nxv2i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %3 = call @llvm.experimental.stepvector.nxv4i8() ; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %4 = call @llvm.experimental.stepvector.nxv8i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %5 = call @llvm.experimental.stepvector.nxv8i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %6 = call @llvm.experimental.stepvector.nxv8i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %7 = call @llvm.experimental.stepvector.nxv8i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %8 = call @llvm.experimental.stepvector.nxv16i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.experimental.stepvector.nxv32i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.experimental.stepvector.nxv64i8() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.experimental.stepvector.nxv1i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %12 = call @llvm.experimental.stepvector.nxv2i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %13 = call @llvm.experimental.stepvector.nxv4i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %14 = call @llvm.experimental.stepvector.nxv8i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %15 = call @llvm.experimental.stepvector.nxv16i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.experimental.stepvector.nxv16i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.experimental.stepvector.nxv16i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %18 = call @llvm.experimental.stepvector.nxv16i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %19 = call @llvm.experimental.stepvector.nxv16i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %20 = call @llvm.experimental.stepvector.nxv32i16() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %21 = call @llvm.experimental.stepvector.nxv1i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call @llvm.experimental.stepvector.nxv2i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %23 = call @llvm.experimental.stepvector.nxv4i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %24 = call @llvm.experimental.stepvector.nxv8i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %25 = call @llvm.experimental.stepvector.nxv16i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %26 = call @llvm.experimental.stepvector.nxv16i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %27 = call @llvm.experimental.stepvector.nxv16i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %28 = call @llvm.experimental.stepvector.nxv16i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %29 = call @llvm.experimental.stepvector.nxv16i32() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %30 = call @llvm.experimental.stepvector.nxv1i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %31 = call @llvm.experimental.stepvector.nxv2i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %32 = call @llvm.experimental.stepvector.nxv4i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %33 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %34 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %35 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %36 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %37 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %38 = call @llvm.experimental.stepvector.nxv8i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %39 = call @llvm.experimental.stepvector.nxv16i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %40 = call @llvm.experimental.stepvector.nxv16i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %41 = call @llvm.experimental.stepvector.nxv16i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %42 = call @llvm.experimental.stepvector.nxv16i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %43 = call @llvm.experimental.stepvector.nxv16i64() -; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %44 = call @llvm.experimental.stepvector.nxv16i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %5 = call @llvm.experimental.stepvector.nxv16i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %6 = call @llvm.experimental.stepvector.nxv32i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %7 = call @llvm.experimental.stepvector.nxv64i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %8 = call @llvm.experimental.stepvector.nxv128i8() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %9 = call @llvm.experimental.stepvector.nxv1i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %10 = call @llvm.experimental.stepvector.nxv2i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %11 = call @llvm.experimental.stepvector.nxv4i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %12 = call @llvm.experimental.stepvector.nxv8i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %13 = call @llvm.experimental.stepvector.nxv16i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %14 = call @llvm.experimental.stepvector.nxv32i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %15 = call @llvm.experimental.stepvector.nxv64i16() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %16 = call @llvm.experimental.stepvector.nxv1i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %17 = call @llvm.experimental.stepvector.nxv2i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %18 = call @llvm.experimental.stepvector.nxv4i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %19 = call @llvm.experimental.stepvector.nxv8i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %20 = call @llvm.experimental.stepvector.nxv16i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %21 = call @llvm.experimental.stepvector.nxv32i32() +; CHECK-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %22 = call @llvm.experimental.stepvector.nxv1i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %23 = call @llvm.experimental.stepvector.nxv2i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %24 = call @llvm.experimental.stepvector.nxv4i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %25 = call @llvm.experimental.stepvector.nxv8i64() +; CHECK-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %26 = call @llvm.experimental.stepvector.nxv16i64() ; CHECK-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void ; - %zero = call @llvm.experimental.stepvector.nxv1i8() - %1 = call @llvm.experimental.stepvector.nxv2i8() - %2 = call @llvm.experimental.stepvector.nxv4i8() - %3 = call @llvm.experimental.stepvector.nxv8i8() - %4 = call @llvm.experimental.stepvector.nxv8i8() - %5 = call @llvm.experimental.stepvector.nxv8i8() - %6 = call @llvm.experimental.stepvector.nxv8i8() - %7 = call @llvm.experimental.stepvector.nxv8i8() - %8 = call @llvm.experimental.stepvector.nxv16i8() - %9 = call @llvm.experimental.stepvector.nxv32i8() - %10 = call @llvm.experimental.stepvector.nxv64i8() - %11 = call @llvm.experimental.stepvector.nxv1i16() - %12 = call @llvm.experimental.stepvector.nxv2i16() - %13 = call @llvm.experimental.stepvector.nxv4i16() - %14 = call @llvm.experimental.stepvector.nxv8i16() - %15 = call @llvm.experimental.stepvector.nxv16i16() - %16 = call @llvm.experimental.stepvector.nxv16i16() - %17 = call @llvm.experimental.stepvector.nxv16i16() - %18 = call @llvm.experimental.stepvector.nxv16i16() - %19 = call @llvm.experimental.stepvector.nxv16i16() - %20 = call @llvm.experimental.stepvector.nxv32i16() - %21 = call @llvm.experimental.stepvector.nxv1i32() - %22 = call @llvm.experimental.stepvector.nxv2i32() - %23 = call @llvm.experimental.stepvector.nxv4i32() - %24 = call @llvm.experimental.stepvector.nxv8i32() - %25 = call @llvm.experimental.stepvector.nxv16i32() - %26 = call @llvm.experimental.stepvector.nxv16i32() - %27 = call @llvm.experimental.stepvector.nxv16i32() - %28 = call @llvm.experimental.stepvector.nxv16i32() - %29 = call @llvm.experimental.stepvector.nxv16i32() - %30 = call @llvm.experimental.stepvector.nxv1i64() - %31 = call @llvm.experimental.stepvector.nxv2i64() - %32 = call @llvm.experimental.stepvector.nxv4i64() - %33 = call @llvm.experimental.stepvector.nxv8i64() - %34 = call @llvm.experimental.stepvector.nxv8i64() - %35 = call @llvm.experimental.stepvector.nxv8i64() - %36 = call @llvm.experimental.stepvector.nxv8i64() - %37 = call @llvm.experimental.stepvector.nxv8i64() - %38 = call @llvm.experimental.stepvector.nxv8i64() - %39 = call @llvm.experimental.stepvector.nxv16i64() - %40 = call @llvm.experimental.stepvector.nxv16i64() - %41 = call @llvm.experimental.stepvector.nxv16i64() - %42 = call @llvm.experimental.stepvector.nxv16i64() - %43 = call @llvm.experimental.stepvector.nxv16i64() - %44 = call @llvm.experimental.stepvector.nxv16i64() + call @llvm.experimental.stepvector.nxv1i8() + call @llvm.experimental.stepvector.nxv2i8() + call @llvm.experimental.stepvector.nxv4i8() + call @llvm.experimental.stepvector.nxv8i8() + call @llvm.experimental.stepvector.nxv16i8() + call @llvm.experimental.stepvector.nxv32i8() + call @llvm.experimental.stepvector.nxv64i8() + call @llvm.experimental.stepvector.nxv128i8() + call @llvm.experimental.stepvector.nxv1i16() + call @llvm.experimental.stepvector.nxv2i16() + call @llvm.experimental.stepvector.nxv4i16() + call @llvm.experimental.stepvector.nxv8i16() + call @llvm.experimental.stepvector.nxv16i16() + call @llvm.experimental.stepvector.nxv32i16() + call @llvm.experimental.stepvector.nxv64i16() + call @llvm.experimental.stepvector.nxv1i32() + call @llvm.experimental.stepvector.nxv2i32() + call @llvm.experimental.stepvector.nxv4i32() + call @llvm.experimental.stepvector.nxv8i32() + call @llvm.experimental.stepvector.nxv16i32() + call @llvm.experimental.stepvector.nxv32i32() + call @llvm.experimental.stepvector.nxv1i64() + call @llvm.experimental.stepvector.nxv2i64() + call @llvm.experimental.stepvector.nxv4i64() + call @llvm.experimental.stepvector.nxv8i64() + call @llvm.experimental.stepvector.nxv16i64() ret void } @@ -107,17 +69,20 @@ declare @llvm.experimental.stepvector.nxv8i8() declare @llvm.experimental.stepvector.nxv16i8() declare @llvm.experimental.stepvector.nxv32i8() declare @llvm.experimental.stepvector.nxv64i8() +declare @llvm.experimental.stepvector.nxv128i8() declare @llvm.experimental.stepvector.nxv1i16() declare @llvm.experimental.stepvector.nxv2i16() declare @llvm.experimental.stepvector.nxv4i16() declare @llvm.experimental.stepvector.nxv8i16() declare @llvm.experimental.stepvector.nxv16i16() declare @llvm.experimental.stepvector.nxv32i16() +declare @llvm.experimental.stepvector.nxv64i16() declare @llvm.experimental.stepvector.nxv1i32() declare @llvm.experimental.stepvector.nxv2i32() declare @llvm.experimental.stepvector.nxv4i32() declare @llvm.experimental.stepvector.nxv8i32() declare @llvm.experimental.stepvector.nxv16i32() +declare @llvm.experimental.stepvector.nxv32i32() declare @llvm.experimental.stepvector.nxv1i64() declare @llvm.experimental.stepvector.nxv2i64() declare @llvm.experimental.stepvector.nxv4i64() -- cgit v1.1 From abd05eb4a53e6c7760496620da417733f52d4bf9 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Wed, 3 Apr 2024 17:34:09 -0700 Subject: [clang] Init fields added by #87357 --- clang/include/clang/Frontend/FrontendOptions.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/include/clang/Frontend/FrontendOptions.h b/clang/include/clang/Frontend/FrontendOptions.h index 864af66..5ee4d47 100644 --- a/clang/include/clang/Frontend/FrontendOptions.h +++ b/clang/include/clang/Frontend/FrontendOptions.h @@ -580,7 +580,9 @@ public: BuildingImplicitModuleUsesLock(true), ModulesEmbedAllFiles(false), IncludeTimestamps(true), UseTemporary(true), AllowPCMWithCompilerErrors(false), ModulesShareFileManager(true), - TimeTraceGranularity(500) {} + EmitSymbolGraph(false), EmitExtensionSymbolGraphs(false), + EmitSymbolGraphSymbolLabelsForTesting(false), + EmitPrettySymbolGraphs(false), TimeTraceGranularity(500) {} /// getInputKindForExtension - Return the appropriate input kind for a file /// extension. For example, "c" would return Language::C. -- cgit v1.1 From 1f01c580444ea2daef67f95ffc5fde2de5a37cec Mon Sep 17 00:00:00 2001 From: darkbuck Date: Wed, 3 Apr 2024 20:52:21 -0400 Subject: [GlobalISel] Fix the infinite loop issue in `commute_int_constant_to_rhs` - When both operands are constant, the matcher runs into an infinite loop as the commutation should be applied only when LHS is a constant and RHS is not. Reviewers: arsenm Reviewed By: arsenm Pull Request: https://github.com/llvm/llvm-project/pull/87426 --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 17 ++++++------- .../GlobalISel/combine-commute-int-const-lhs.mir | 28 ++++++++++++++++++++++ 2 files changed, 37 insertions(+), 8 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 5cf7a33..062132c 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6276,14 +6276,15 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); - auto *LHSDef = MRI.getVRegDef(LHS); - if (getIConstantVRegVal(LHS, MRI).has_value()) - return true; - - // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute - // as long as we don't already have a constant on the RHS. - if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER) - return false; + if (!getIConstantVRegVal(LHS, MRI)) { + // Skip commuting if LHS is not a constant. But, LHS may be a + // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already + // have a constant on the RHS. + if (MRI.getVRegDef(LHS)->getOpcode() != + TargetOpcode::G_CONSTANT_FOLD_BARRIER) + return false; + } + // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER. return MRI.getVRegDef(RHS)->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER && !getIConstantVRegVal(RHS, MRI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir new file mode 100644 index 0000000..b145a6d --- /dev/null +++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir @@ -0,0 +1,28 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 +# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner %s -o - \ +# RUN: --aarch64prelegalizercombiner-disable-rule=constant_fold_binop | FileCheck %s + +# `constant_fold_binop` is disabled to trigger the infinite loop in `commute_int_constant_to_rhs`. + +--- +name: add +tracksRegLiveness: true +body: | + bb.0: + liveins: $s0 + + ; CHECK-LABEL: name: add + ; CHECK: liveins: $s0 + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: %c0:_(s32) = G_CONSTANT i32 1 + ; CHECK-NEXT: %c1:_(s32) = G_CONSTANT i32 2 + ; CHECK-NEXT: %add:_(s32) = G_ADD %c0, %c1 + ; CHECK-NEXT: $s0 = COPY %add(s32) + ; CHECK-NEXT: RET_ReallyLR + %c0:_(s32) = G_CONSTANT i32 1 + %c1:_(s32) = G_CONSTANT i32 2 + %add:_(s32) = G_ADD %c0, %c1 + $s0 = COPY %add + RET_ReallyLR + +... -- cgit v1.1 From a853d79963c6ac09154817690a8c317157345876 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Wed, 3 Apr 2024 18:48:30 -0700 Subject: [RISCV][GISel] Don't check for FP uses of of IMPLICIT_DEF if the type is vector. NFC If the type is vector, we can immediately know to use vector mapping. Previously we searched for FP uses, but then replaced it if the type was vector. --- llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 4f34514..8534024 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -341,18 +341,17 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_IMPLICIT_DEF: { Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); - uint64_t DstMinSize = DstTy.getSizeInBits().getKnownMinValue(); + unsigned DstMinSize = DstTy.getSizeInBits().getKnownMinValue(); auto Mapping = GPRValueMapping; // FIXME: May need to do a better job determining when to use FPRB. // For example, the look through COPY case: // %0:_(s32) = G_IMPLICIT_DEF // %1:_(s32) = COPY %0 // $f10_d = COPY %1(s32) - if (anyUseOnlyUseFP(Dst, MRI, TRI)) - Mapping = getFPValueMapping(DstMinSize); - if (DstTy.isVector()) Mapping = getVRBValueMapping(DstMinSize); + else if (anyUseOnlyUseFP(Dst, MRI, TRI)) + Mapping = getFPValueMapping(DstMinSize); return getInstructionMapping(DefaultMappingID, /*Cost=*/1, Mapping, NumOperands); -- cgit v1.1 From a4c470555b5c311770e6cb58494c573c4efe53d6 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 4 Apr 2024 11:17:59 +0900 Subject: [mlir][linalg] Fix builder API usage in `RegionBuilderHelper` (#87451) Operations must be created with the supplied builder. Otherwise, the dialect conversion / greedy pattern rewrite driver can break. This commit fixes a crash in the dialect conversion: ``` within split at llvm-project/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir:1 offset :8:8: error: failed to legalize operation 'tosa.add' %0 = tosa.add %1, %arg2 : (tensor<10x10xf32>, tensor<*xf32>) -> tensor<*xf32> ^ within split at llvm-project/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir:1 offset :8:8: note: see current operation: %9 = "tosa.add"(%8, %arg2) : (tensor<10x10xf32>, tensor<*xf32>) -> tensor<*xf32> mlir-opt: llvm-project/mlir/include/mlir/IR/UseDefLists.h:198: mlir::IRObjectWithUseList::~IRObjectWithUseList() [OperandType = mlir::OpOperand]: Assertion `use_empty() && "Cannot destroy a value that still has uses!"' failed. ``` This commit is the proper fix for #87297 (which was reverted). --- mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp | 36 +++++++++++----------- .../SparseTensor/Transforms/SparseTensorPasses.cpp | 2 +- .../TosaToLinalg/tosa-to-linalg-invalid.mlir | 12 ++++++++ .../mlir-linalg-ods-yaml-gen.cpp | 2 +- 4 files changed, 32 insertions(+), 20 deletions(-) diff --git a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp index 2d7219f..9c5c58f 100644 --- a/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp +++ b/mlir/lib/Dialect/Linalg/IR/LinalgOps.cpp @@ -373,14 +373,15 @@ namespace { class RegionBuilderHelper { public: - RegionBuilderHelper(MLIRContext *context, Block &block) - : context(context), block(block) {} + RegionBuilderHelper(OpBuilder &builder, Block &block) + : builder(builder), block(block) {} // Build the unary functions defined by OpDSL. Value buildUnaryFn(UnaryFn unaryFn, Value arg) { if (!isFloatingPoint(arg)) llvm_unreachable("unsupported non numeric type"); - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); switch (unaryFn) { case UnaryFn::exp: return builder.create(arg.getLoc(), arg); @@ -407,7 +408,8 @@ public: arg1.getType().getIntOrFloatBitWidth() == 1; if (!allComplex && !allFloatingPoint && !allInteger) llvm_unreachable("unsupported non numeric type"); - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); switch (binaryFn) { case BinaryFn::add: if (allComplex) @@ -481,29 +483,32 @@ public: } void yieldOutputs(ValueRange values) { - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); Location loc = builder.getUnknownLoc(); builder.create(loc, values); } Value constant(const std::string &value) { - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); Location loc = builder.getUnknownLoc(); Attribute valueAttr = parseAttribute(value, builder.getContext()); return builder.create(loc, ::cast(valueAttr)); } Value index(int64_t dim) { - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); return builder.create(builder.getUnknownLoc(), dim); } Type getIntegerType(unsigned width) { - return IntegerType::get(context, width); + return IntegerType::get(builder.getContext(), width); } - Type getFloat32Type() { return Float32Type::get(context); } - Type getFloat64Type() { return Float64Type::get(context); } + Type getFloat32Type() { return Float32Type::get(builder.getContext()); } + Type getFloat64Type() { return Float64Type::get(builder.getContext()); } private: // Generates operations to cast the given operand to a specified type. @@ -511,7 +516,8 @@ private: // operand returned as-is (which will presumably yield a verification // issue downstream). Value cast(Type toType, Value operand, bool isUnsignedCast) { - OpBuilder builder = getBuilder(); + OpBuilder::InsertionGuard g(builder); + builder.setInsertionPointToEnd(&block); auto loc = operand.getLoc(); return convertScalarToDtype(builder, loc, operand, toType, isUnsignedCast); } @@ -526,13 +532,7 @@ private: return llvm::isa(value.getType()); } - OpBuilder getBuilder() { - OpBuilder builder(context); - builder.setInsertionPointToEnd(&block); - return builder; - } - - MLIRContext *context; + OpBuilder &builder; Block █ }; diff --git a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp index d4c1792..acea25f 100644 --- a/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp +++ b/mlir/lib/Dialect/SparseTensor/Transforms/SparseTensorPasses.cpp @@ -274,7 +274,7 @@ struct SparseTensorCodegenPass }); // The following operations and dialects may be introduced by the // codegen rules, and are therefore marked as legal. - target.addLegalOp(); + target.addLegalOp(); target.addLegalDialect< arith::ArithDialect, bufferization::BufferizationDialect, complex::ComplexDialect, memref::MemRefDialect, scf::SCFDialect>(); diff --git a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir index 17eec59..ad65410 100644 --- a/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir +++ b/mlir/test/Conversion/TosaToLinalg/tosa-to-linalg-invalid.mlir @@ -15,3 +15,15 @@ func.func @tensor_with_unknown_rank(%arg0: tensor<*xi8>) -> tensor<*xi8> { %0 = "tosa.abs"(%arg0) : (tensor<*xi8>) -> tensor<*xi8> return %0 : tensor<*xi8> } + +// ----- + +// CHECK-LABEL: @unranked_add +func.func @unranked_add(%arg0 : tensor<10x10xf32> , %arg1 : tensor<10x10xf32>, %arg2 : tensor<*xf32>) -> (tensor<10x10xf32>) { + // expected-error@+3 {{failed to legalize operation 'tosa.add'}} + %reduce = tosa.reduce_max %arg0 {axis = 1 : i32} : (tensor<10x10xf32>) -> tensor<10x1xf32> + %1 = tosa.add %reduce, %arg1 : (tensor<10x1xf32>, tensor<10x10xf32>) -> tensor<10x10xf32> + %0 = tosa.add %1, %arg2 : (tensor<10x10xf32>, tensor<*xf32>) -> tensor<*xf32> + %2 = tosa.reshape %0 {new_shape = array} : (tensor<*xf32>) -> tensor<10x10xf32> + return %2 : tensor<10x10xf32> +} diff --git a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp index f14e559..fe6ad15 100644 --- a/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp +++ b/mlir/tools/mlir-linalg-ods-gen/mlir-linalg-ods-yaml-gen.cpp @@ -1008,7 +1008,7 @@ void {0}::regionBuilder(ImplicitLocOpBuilder &b, Block &block, ArrayRef attrs) {{ assert({1} > 0 && block.getNumArguments() == {1} && "{0} regionBuilder expects {1} (>=0) args"); - RegionBuilderHelper helper(block.getArgument(0).getContext(), block); + RegionBuilderHelper helper(b, block); SmallVector yields; {2} {3} -- cgit v1.1 From fb635be0b8a3e14ca38f3f74e8224f9e0f716a10 Mon Sep 17 00:00:00 2001 From: Hristo Hristov Date: Thu, 4 Apr 2024 06:03:00 +0300 Subject: [libc++] P2867R1: Remove Deprecated `strstream`s From C++26 (#87107) Implements: https://wg21.link/P2867R2 --------- Co-authored-by: Hristo Hristov --- libcxx/docs/ReleaseNotes/19.rst | 3 +++ libcxx/docs/Status/Cxx2cPapers.csv | 2 +- libcxx/docs/UsingLibcxx.rst | 5 ++++- libcxx/include/strstream | 12 ++++++++---- libcxx/modules/std/strstream.inc | 2 ++ .../depr.istrstream/depr.istrstream.cons/ccp.pass.cpp | 2 +- .../depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp | 2 +- .../depr.istrstream/depr.istrstream.cons/cp.pass.cpp | 2 +- .../depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp | 2 +- .../depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp | 2 +- .../depr.istrstream/depr.istrstream.members/str.pass.cpp | 2 +- .../depr/depr.str.strstreams/depr.istrstream/depr.verify.cpp | 2 ++ .../depr/depr.str.strstreams/depr.istrstream/types.pass.cpp | 2 +- .../depr.ostrstream.cons/cp_size_mode.pass.cpp | 2 +- .../depr.ostrstream/depr.ostrstream.cons/default.pass.cpp | 2 +- .../depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp | 2 +- .../depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp | 2 +- .../depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp | 2 +- .../depr.ostrstream/depr.ostrstream.members/str.pass.cpp | 2 +- .../depr/depr.str.strstreams/depr.ostrstream/depr.verify.cpp | 2 ++ .../depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp | 2 +- .../depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp | 2 +- .../depr.strstream/depr.strstream.cons/default.pass.cpp | 2 +- .../depr.strstream/depr.strstream.dest/rdbuf.pass.cpp | 2 +- .../depr.strstream/depr.strstream.oper/freeze.pass.cpp | 2 +- .../depr.strstream/depr.strstream.oper/pcount.pass.cpp | 2 +- .../depr.strstream/depr.strstream.oper/str.pass.cpp | 2 +- .../depr/depr.str.strstreams/depr.strstream/depr.verify.cpp | 2 ++ .../depr/depr.str.strstreams/depr.strstream/types.pass.cpp | 2 +- .../depr.strstreambuf.cons/ccp_size.pass.cpp | 2 +- .../depr.strstreambuf.cons/cp_size_cp.pass.cpp | 2 +- .../depr.strstreambuf.cons/cscp_size.pass.cpp | 2 +- .../depr.strstreambuf.cons/cucp_size.pass.cpp | 2 +- .../depr.strstreambuf.cons/custom_alloc.pass.cpp | 2 +- .../depr.strstreambuf.cons/default.pass.cpp | 2 +- .../depr.strstreambuf.cons/scp_size_scp.pass.cpp | 2 +- .../depr.strstreambuf.cons/ucp_size_ucp.pass.cpp | 2 +- .../depr.strstreambuf.members/freeze.pass.cpp | 2 +- .../depr.strstreambuf.members/overflow.pass.cpp | 2 +- .../depr.strstreambuf.members/pcount.pass.cpp | 2 +- .../depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/overflow.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/pbackfail.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/seekoff.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/seekpos.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/setbuf.pass.cpp | 2 +- .../depr.strstreambuf.virtuals/underflow.pass.cpp | 2 +- .../depr.str.strstreams/depr.strstreambuf/depr.verify.cpp | 2 ++ .../depr.str.strstreams/depr.strstreambuf/types.pass.cpp | 2 +- 49 files changed, 66 insertions(+), 46 deletions(-) diff --git a/libcxx/docs/ReleaseNotes/19.rst b/libcxx/docs/ReleaseNotes/19.rst index dd39c1b..2da9df5 100644 --- a/libcxx/docs/ReleaseNotes/19.rst +++ b/libcxx/docs/ReleaseNotes/19.rst @@ -42,6 +42,7 @@ Implemented Papers - P2652R2 - Disallow User Specialization of ``allocator_traits`` - P2819R2 - Add ``tuple`` protocol to ``complex`` - P2495R3 - Interfacing ``stringstream``\s with ``string_view`` +- P2867R2 - Remove Deprecated ``strstream``\s From C++26 - P2302R4 - ``std::ranges::contains`` - P1659R3 - ``std::ranges::starts_with`` and ``std::ranges::ends_with`` @@ -54,6 +55,8 @@ Improvements and New Features - The ``std::mismatch`` algorithm has been optimized for integral types, which can lead up to 40x performance improvements. +- The ``_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM`` macro has been added to make the declarations in ```` available. + Deprecations and Removals ------------------------- diff --git a/libcxx/docs/Status/Cxx2cPapers.csv b/libcxx/docs/Status/Cxx2cPapers.csv index 6e82086..a34dad5 100644 --- a/libcxx/docs/Status/Cxx2cPapers.csv +++ b/libcxx/docs/Status/Cxx2cPapers.csv @@ -47,7 +47,7 @@ "`P1673R13 `__","LWG","A free function linear algebra interface based on the BLAS","Kona November 2023","","","" "","","","","","","" "`P2875R4 `__","LWG","Undeprecate ``polymorphic_allocator::destroy`` for C++26","Tokyo March 2024","|Complete|","15.0","" -"`P2867R2 `__","LWG","Remove Deprecated ``strstreams`` From C++26","Tokyo March 2024","","","" +"`P2867R2 `__","LWG","Remove Deprecated ``strstreams`` From C++26","Tokyo March 2024","|Complete|","19.0","" "`P2869R4 `__","LWG","Remove Deprecated ``shared_ptr`` Atomic Access APIs from C++26","Tokyo March 2024","","","" "`P2872R3 `__","LWG","Remove ``wstring_convert`` From C++26","Tokyo March 2024","","","" "`P3107R5 `__","LWG","Permit an efficient implementation of ``std::print``","Tokyo March 2024","","","|format| |DR|" diff --git a/libcxx/docs/UsingLibcxx.rst b/libcxx/docs/UsingLibcxx.rst index ac12b0b..8a1c747 100644 --- a/libcxx/docs/UsingLibcxx.rst +++ b/libcxx/docs/UsingLibcxx.rst @@ -272,7 +272,10 @@ C++26 Specific Configuration Macros ``std::basic_string<...>::reserve()``. **_LIBCPP_ENABLE_CXX26_REMOVED_ALLOCATOR_MEMBERS**: - This macro is used to re-enable redundant member of ``allocator::is_always_equal`` + This macro is used to re-enable redundant member of ``allocator::is_always_equal``. + +**_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM**: + This macro is used to re-enable all named declarations in ````. Libc++ Extensions ================= diff --git a/libcxx/include/strstream b/libcxx/include/strstream index e9f5336..c8df6eb 100644 --- a/libcxx/include/strstream +++ b/libcxx/include/strstream @@ -13,7 +13,7 @@ /* strstream synopsis -class strstreambuf +class strstreambuf // Removed in C++26 : public basic_streambuf { public: @@ -63,7 +63,7 @@ private: void (*pfree)(void*); // exposition only }; -class istrstream +class istrstream // Removed in C++26 : public basic_istream { public: @@ -81,7 +81,7 @@ private: strstreambuf sb; // exposition only }; -class ostrstream +class ostrstream // Removed in C++26 : public basic_ostream { public: @@ -99,7 +99,7 @@ private: strstreambuf sb; // exposition only }; -class strstream +class strstream // Removed in C++26 : public basic_iostream { public: @@ -138,6 +138,8 @@ private: # pragma GCC system_header #endif +#if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM) || defined(_LIBCPP_BUILDING_LIBRARY) + _LIBCPP_PUSH_MACROS #include <__undef_macros> @@ -344,4 +346,6 @@ _LIBCPP_END_NAMESPACE_STD _LIBCPP_POP_MACROS +#endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM) || defined(_LIBCPP_BUILDING_LIBRARY) + #endif // _LIBCPP_STRSTREAM diff --git a/libcxx/modules/std/strstream.inc b/libcxx/modules/std/strstream.inc index a33c514..8087967 100644 --- a/libcxx/modules/std/strstream.inc +++ b/libcxx/modules/std/strstream.inc @@ -9,9 +9,11 @@ export namespace std { #ifndef _LIBCPP_HAS_NO_LOCALIZATION +# if _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM) using std::istrstream; using std::ostrstream; using std::strstream; using std::strstreambuf; +# endif // _LIBCPP_STD_VER < 26 || defined(_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM) #endif // _LIBCPP_HAS_NO_LOCALIZATION } // namespace std diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp index b5ee0bf..701e6df 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp index 4d0d673..fd2ad66 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/ccp_size.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp index 5898094..c5fe349 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp index e13e20e..7d9c7d6 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.cons/cp_size.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp index 449114a..f5ef29b 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/rdbuf.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp index e7c0637..63d3800 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.istrstream.members/str.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.verify.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.verify.cpp index 2ab252e..d02f12d 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.verify.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/depr.verify.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM + // // check that istrstream is marked deprecated diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp index be1a9e1..526c4dc 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.istrstream/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp index 8698983..b9db1bf 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/cp_size_mode.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp index abbf6af..b67f0ab 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.cons/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp index 854e68b..e087c06 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/freeze.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp index 9830aeb..73f2033 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/pcount.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp index f9a859d..d8b55d9 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/rdbuf.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp index 72f665a..2867031 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.ostrstream.members/str.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.verify.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.verify.cpp index e0c805f..9ec4650 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.verify.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/depr.verify.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM + // // check that ostrstream is marked deprecated diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp index 6a71c44..e321444 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.ostrstream/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp index a85e132..54e09d3 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/cp_size_mode.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp index 390162e..1e4c120 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.cons/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp index 3fe277a..99d58cc 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.dest/rdbuf.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp index 263fdde..6cc26bb 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/freeze.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp index b053cf1..efe2c32 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/pcount.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp index 3d251d9..e1bc40c 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.strstream.oper/str.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.verify.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.verify.cpp index 0365522..ab88d6d 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.verify.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/depr.verify.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM + // // check that strstream is marked deprecated diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp index fb54384..7609430 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstream/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp index 8f81707..3d7f2ff 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ccp_size.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp index 25a9617..0c4bf62 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cp_size_cp.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp index fc3386f..e0928835 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cscp_size.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp index a74c504..7f5504d 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/cucp_size.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp index 756427d..0aa7e1a 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/custom_alloc.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp index 81924c9..803be42 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/default.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp index b8991a8..35c1512 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/scp_size_scp.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp index 1d3463f..e71fa031 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.cons/ucp_size_ucp.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp index 93eec8d..1276187 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/freeze.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp index 5b973cf..fc79e78 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/overflow.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp index b64c9dc..b62c339 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/pcount.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp index d6c8b8e..68be8dd 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.members/str.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp index 37109c7..6a932dc 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/overflow.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp index 698953f..484a726 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/pbackfail.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp index d98e6f7..96900d8 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekoff.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp index be88f5a..f3193d3 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/seekpos.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp index ce7612b..44e6704 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/setbuf.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp index 4fc79b5..be916be 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.strstreambuf.virtuals/underflow.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.verify.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.verify.cpp index a598acb..471b287 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.verify.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/depr.verify.cpp @@ -6,6 +6,8 @@ // //===----------------------------------------------------------------------===// +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM + // // check that strstreambuf is marked deprecated diff --git a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp index bc312cb..aee1260 100644 --- a/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp +++ b/libcxx/test/std/depr/depr.str.strstreams/depr.strstreambuf/types.pass.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS +// ADDITIONAL_COMPILE_FLAGS: -D_LIBCPP_DISABLE_DEPRECATION_WARNINGS -D_LIBCPP_ENABLE_CXX26_REMOVED_STRSTREAM // -- cgit v1.1 From 07d5f491867d390b9aee33035c187e27cf0746a0 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 4 Apr 2024 11:30:23 +0800 Subject: [RISCV] Add patterns for fixed vector vwsll (#87316) Fixed vectors have their sext/zext operands legalized to _VL nodes, so we need to handle them in the patterns. This adds a riscv_ext_vl_oneuse pattern since we don't care about the type of extension used for the shift amount, and extends Low8BitsSplatPat to handle other _VL nodes. We don't actually need to check the mask or VL there since none of the _VL nodes have passthru operands. The remaining test cases that are widening from i8->i64 need to be handled by extending combineBinOp_VLToVWBinOp_VL. This also fixes Low8BitsSplatPat incorrectly checking the vector size instead of the element size to determine if the splat value might have been truncated below 8 bits. --- llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 30 ++-- llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td | 8 + llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td | 35 ++++ llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll | 183 ++++++++------------- 4 files changed, 130 insertions(+), 126 deletions(-) diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp index 55ba494..f99dc0b 100644 --- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp @@ -3287,24 +3287,24 @@ bool RISCVDAGToDAGISel::selectVSplatUimm(SDValue N, unsigned Bits, } bool RISCVDAGToDAGISel::selectLow8BitsVSplat(SDValue N, SDValue &SplatVal) { - // Truncates are custom lowered during legalization. - auto IsTrunc = [this](SDValue N) { - if (N->getOpcode() != RISCVISD::TRUNCATE_VECTOR_VL) + auto IsExtOrTrunc = [](SDValue N) { + switch (N->getOpcode()) { + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + // There's no passthru on these _VL nodes so any VL/mask is ok, since any + // inactive elements will be undef. + case RISCVISD::TRUNCATE_VECTOR_VL: + case RISCVISD::VSEXT_VL: + case RISCVISD::VZEXT_VL: + return true; + default: return false; - SDValue VL; - selectVLOp(N->getOperand(2), VL); - // Any vmset_vl is ok, since any bits past VL are undefined and we can - // assume they are set. - return N->getOperand(1).getOpcode() == RISCVISD::VMSET_VL && - isa(VL) && - cast(VL)->getSExtValue() == RISCV::VLMaxSentinel; + } }; - // We can have multiple nested truncates, so unravel them all if needed. - while (N->getOpcode() == ISD::SIGN_EXTEND || - N->getOpcode() == ISD::ZERO_EXTEND || IsTrunc(N)) { - if (!N.hasOneUse() || - N.getValueType().getSizeInBits().getKnownMinValue() < 8) + // We can have multiple nested nodes, so unravel them all if needed. + while (IsExtOrTrunc(N)) { + if (!N.hasOneUse() || N.getScalarValueSizeInBits() < 8) return false; N = N->getOperand(0); } diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td index cc44092..73d52d5 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVVLPatterns.td @@ -387,6 +387,9 @@ def SDT_RISCVVEXTEND_VL : SDTypeProfile<1, 3, [SDTCisVec<0>, SDTCisVT<3, XLenVT>]>; def riscv_sext_vl : SDNode<"RISCVISD::VSEXT_VL", SDT_RISCVVEXTEND_VL>; def riscv_zext_vl : SDNode<"RISCVISD::VZEXT_VL", SDT_RISCVVEXTEND_VL>; +def riscv_ext_vl : PatFrags<(ops node:$A, node:$B, node:$C), + [(riscv_sext_vl node:$A, node:$B, node:$C), + (riscv_zext_vl node:$A, node:$B, node:$C)]>; def riscv_trunc_vector_vl : SDNode<"RISCVISD::TRUNCATE_VECTOR_VL", SDTypeProfile<1, 3, [SDTCisVec<0>, @@ -535,6 +538,11 @@ def riscv_zext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), return N->hasOneUse(); }]>; +def riscv_ext_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), + (riscv_ext_vl node:$A, node:$B, node:$C), [{ + return N->hasOneUse(); +}]>; + def riscv_fpextend_vl_oneuse : PatFrag<(ops node:$A, node:$B, node:$C), (riscv_fpextend_vl node:$A, node:$B, node:$C), [{ return N->hasOneUse(); diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td index 51a7a0a1..c1facc79 100644 --- a/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td +++ b/llvm/lib/Target/RISCV/RISCVInstrInfoZvk.td @@ -630,6 +630,19 @@ foreach vtiToWti = AllWidenableIntVectors in { (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl + (wti.Vector (riscv_zext_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask V0), VLOpFrag)), + (wti.Vector (riscv_ext_vl_oneuse + (vti.Vector vti.RegClass:$rs1), + (vti.Mask V0), VLOpFrag)), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VV_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, vti.RegClass:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), (wti.Vector wti.RegClass:$merge), @@ -639,6 +652,17 @@ foreach vtiToWti = AllWidenableIntVectors in { (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; def : Pat<(riscv_shl_vl + (wti.Vector (riscv_zext_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask V0), VLOpFrag)), + (wti.Vector (Low8BitsSplatPat (XLenVT GPR:$rs1))), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VX_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, GPR:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + + def : Pat<(riscv_shl_vl (wti.Vector (zext_oneuse (vti.Vector vti.RegClass:$rs2))), (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), (wti.Vector wti.RegClass:$merge), @@ -647,6 +671,17 @@ foreach vtiToWti = AllWidenableIntVectors in { wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_shl_vl + (wti.Vector (riscv_zext_vl_oneuse + (vti.Vector vti.RegClass:$rs2), + (vti.Mask V0), VLOpFrag)), + (wti.Vector (SplatPat_uimm5 uimm5:$rs1)), + (wti.Vector wti.RegClass:$merge), + (vti.Mask V0), VLOpFrag), + (!cast("PseudoVWSLL_VI_"#vti.LMul.MX#"_MASK") + wti.RegClass:$merge, vti.RegClass:$rs2, uimm5:$rs1, + (vti.Mask V0), GPR:$vl, vti.Log2SEW, TAIL_AGNOSTIC)>; + def : Pat<(riscv_vwsll_vl (vti.Vector vti.RegClass:$rs2), (vti.Vector vti.RegClass:$rs1), diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll index f5305a1..83d1d1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-vwsll.ll @@ -19,10 +19,9 @@ define <4 x i64> @vwsll_vv_v4i64_sext(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %y = sext <4 x i32> %b to <4 x i64> @@ -41,10 +40,9 @@ define <4 x i64> @vwsll_vv_v4i64_zext(<4 x i32> %a, <4 x i32> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %y = zext <4 x i32> %b to <4 x i64> @@ -62,9 +60,9 @@ define <4 x i64> @vwsll_vx_i64_v4i64(<4 x i32> %a, i64 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v4i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i64> poison, i64 %b, i32 0 %splat = shufflevector <4 x i64> %head, <4 x i64> poison, <4 x i32> zeroinitializer @@ -88,10 +86,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_sext(<4 x i32> %a, i32 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer @@ -116,10 +112,8 @@ define <4 x i64> @vwsll_vx_i32_v4i64_zext(<4 x i32> %a, i32 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i32> poison, i32 %b, i32 0 %splat = shufflevector <4 x i32> %head, <4 x i32> poison, <4 x i32> zeroinitializer @@ -142,12 +136,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_sext(<4 x i32> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i16> poison, i16 %b, i32 0 %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer @@ -170,12 +161,9 @@ define <4 x i64> @vwsll_vx_i16_v4i64_zext(<4 x i32> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e16, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i16> poison, i16 %b, i32 0 %splat = shufflevector <4 x i16> %head, <4 x i16> poison, <4 x i32> zeroinitializer @@ -198,12 +186,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_sext(<4 x i32> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i8> poison, i8 %b, i32 0 %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer @@ -226,12 +211,9 @@ define <4 x i64> @vwsll_vx_i8_v4i64_zext(<4 x i32> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v4i64_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e8, mf4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <4 x i8> poison, i8 %b, i32 0 %splat = shufflevector <4 x i8> %head, <4 x i8> poison, <4 x i32> zeroinitializer @@ -251,9 +233,9 @@ define <4 x i64> @vwsll_vi_v4i64(<4 x i32> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v4i64: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 4, e64, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 4, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <4 x i32> %a to <4 x i64> %z = shl <4 x i64> %x, splat (i64 2) @@ -275,10 +257,9 @@ define <8 x i32> @vwsll_vv_v8i32_sext(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %y = sext <8 x i16> %b to <8 x i32> @@ -297,10 +278,9 @@ define <8 x i32> @vwsll_vv_v8i32_zext(<8 x i16> %a, <8 x i16> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v8i32_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %y = zext <8 x i16> %b to <8 x i32> @@ -318,9 +298,9 @@ define <8 x i32> @vwsll_vx_i64_v8i32(<8 x i16> %a, i64 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i64_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i64> poison, i64 %b, i32 0 %splat = shufflevector <8 x i64> %head, <8 x i64> poison, <8 x i32> zeroinitializer @@ -340,9 +320,9 @@ define <8 x i32> @vwsll_vx_i32_v8i32(<8 x i16> %a, i32 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i32> poison, i32 %b, i32 0 %splat = shufflevector <8 x i32> %head, <8 x i32> poison, <8 x i32> zeroinitializer @@ -366,10 +346,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_sext(<8 x i16> %a, i16 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i16> poison, i16 %b, i32 0 %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer @@ -394,10 +372,8 @@ define <8 x i32> @vwsll_vx_i16_v8i32_zext(<8 x i16> %a, i16 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i16> poison, i16 %b, i32 0 %splat = shufflevector <8 x i16> %head, <8 x i16> poison, <8 x i32> zeroinitializer @@ -420,12 +396,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_sext(<8 x i16> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i8> poison, i8 %b, i32 0 %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer @@ -448,12 +421,9 @@ define <8 x i32> @vwsll_vx_i8_v8i32_zext(<8 x i16> %a, i8 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i8_v8i32_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e8, mf2, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <8 x i8> poison, i8 %b, i32 0 %splat = shufflevector <8 x i8> %head, <8 x i8> poison, <8 x i32> zeroinitializer @@ -473,9 +443,9 @@ define <8 x i32> @vwsll_vi_v8i32(<8 x i16> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v8i32: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 8, e32, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 8, e16, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <8 x i16> %a to <8 x i32> %z = shl <8 x i32> %x, splat (i32 2) @@ -497,10 +467,9 @@ define <16 x i16> @vwsll_vv_v16i16_sext(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_sext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %y = sext <16 x i8> %b to <16 x i16> @@ -519,10 +488,9 @@ define <16 x i16> @vwsll_vv_v16i16_zext(<16 x i8> %a, <16 x i8> %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vv_v16i16_zext: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %y = zext <16 x i8> %b to <16 x i16> @@ -552,12 +520,9 @@ define <16 x i16> @vwsll_vx_i32_v16i16(<16 x i8> %a, i32 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i32_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e32, m4, ta, ma -; CHECK-ZVBB-NEXT: vmv.v.x v12, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vnsrl.wi v8, v12, 0 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v8 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i32> poison, i32 %b, i32 0 %splat = shufflevector <16 x i32> %head, <16 x i32> poison, <16 x i32> zeroinitializer @@ -577,9 +542,9 @@ define <16 x i16> @vwsll_vx_i16_v16i16(<16 x i8> %a, i16 %b) { ; ; CHECK-ZVBB-LABEL: vwsll_vx_i16_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vx v10, v8, a0 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i16> poison, i16 %b, i32 0 %splat = shufflevector <16 x i16> %head, <16 x i16> poison, <16 x i32> zeroinitializer @@ -603,10 +568,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_sext(<16 x i8> %a, i8 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer @@ -631,10 +594,8 @@ define <16 x i16> @vwsll_vx_i8_v16i16_zext(<16 x i8> %a, i8 %b) { ; CHECK-ZVBB: # %bb.0: ; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma ; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 -; CHECK-ZVBB-NEXT: vsetvli zero, zero, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 -; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: vwsll.vv v10, v8, v9 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %head = insertelement <16 x i8> poison, i8 %b, i32 0 %splat = shufflevector <16 x i8> %head, <16 x i8> poison, <16 x i32> zeroinitializer @@ -654,9 +615,9 @@ define <16 x i16> @vwsll_vi_v16i16(<16 x i8> %a) { ; ; CHECK-ZVBB-LABEL: vwsll_vi_v16i16: ; CHECK-ZVBB: # %bb.0: -; CHECK-ZVBB-NEXT: vsetivli zero, 16, e16, m2, ta, ma -; CHECK-ZVBB-NEXT: vzext.vf2 v10, v8 -; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: vsetivli zero, 16, e8, m1, ta, ma +; CHECK-ZVBB-NEXT: vwsll.vi v10, v8, 2 +; CHECK-ZVBB-NEXT: vmv2r.v v8, v10 ; CHECK-ZVBB-NEXT: ret %x = zext <16 x i8> %a to <16 x i16> %z = shl <16 x i16> %x, splat (i16 2) -- cgit v1.1 From 698bf3dafcc0dfa15540ae7f1f9b72208a578bd2 Mon Sep 17 00:00:00 2001 From: Sourabh Singh Tomar Date: Thu, 4 Apr 2024 09:27:57 +0530 Subject: [flang][OpenMP] Fix for #86393 (#87452) --- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 6 +- .../test/Lower/OpenMP/parallel-reduction-array.f90 | 2 +- .../Lower/OpenMP/parallel-reduction-array2.f90 | 2 +- flang/test/Lower/OpenMP/parallel-reduction3.f90 | 125 +++++++++++++++++++++ flang/test/Lower/OpenMP/wsloop-reduction-array.f90 | 2 +- .../test/Lower/OpenMP/wsloop-reduction-array2.f90 | 2 +- 6 files changed, 134 insertions(+), 5 deletions(-) create mode 100644 flang/test/Lower/OpenMP/parallel-reduction3.f90 diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 0d05ca5..6a8447a 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -13,6 +13,7 @@ #include "ReductionProcessor.h" #include "flang/Lower/AbstractConverter.h" +#include "flang/Lower/SymbolMap.h" #include "flang/Optimizer/Builder/HLFIRTools.h" #include "flang/Optimizer/Builder/Todo.h" #include "flang/Optimizer/Dialect/FIRType.h" @@ -527,7 +528,10 @@ void ReductionProcessor::addDeclareReduction( // all arrays must be boxed so that we have convenient access to all the // information needed to iterate over the array if (mlir::isa(redType.getEleTy())) { - hlfir::Entity entity{symVal}; + // For Host associated symbols, use `SymbolBox` instead + Fortran::lower::SymbolBox symBox = + converter.lookupOneLevelUpSymbol(*symbol); + hlfir::Entity entity{symBox.getAddr()}; entity = genVariableBox(currentLocation, builder, entity); mlir::Value box = entity.getBase(); diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array.f90 index 735a998..56dcabb 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-array.f90 @@ -50,7 +50,7 @@ end program ! CHECK: %[[VAL_1:.*]] = arith.constant 3 : index ! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1(%[[VAL_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref>> ! CHECK: omp.parallel byref reduction(@add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref>>) { diff --git a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 index 4834047..94bff41 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-array2.f90 @@ -50,7 +50,7 @@ end program ! CHECK: %[[VAL_1:.*]] = arith.constant 3 : index ! CHECK: %[[VAL_2:.*]] = fir.shape %[[VAL_1]] : (index) -> !fir.shape<1> ! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_2]]) {uniq_name = "_QFEi"} : (!fir.ref>, !fir.shape<1>) -> (!fir.ref>, !fir.ref>) -! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#1(%[[VAL_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_4:.*]] = fir.embox %[[VAL_3]]#0(%[[VAL_2]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_5:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_4]] to %[[VAL_5]] : !fir.ref>> ! CHECK: omp.parallel byref reduction(@add_reduction_byref_box_3xi32 %[[VAL_5]] -> %[[VAL_6:.*]] : !fir.ref>>) { diff --git a/flang/test/Lower/OpenMP/parallel-reduction3.f90 b/flang/test/Lower/OpenMP/parallel-reduction3.f90 new file mode 100644 index 0000000..b257597 --- /dev/null +++ b/flang/test/Lower/OpenMP/parallel-reduction3.f90 @@ -0,0 +1,125 @@ +! NOTE: Assertions have been autogenerated by utils/generate-test-checks.py + +! The script is designed to make adding checks to +! a test case fast, it is *not* designed to be authoritative +! about what constitutes a good test! The CHECK should be +! minimized and named to reflect the test intent. + +! RUN: bbc -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s 2>&1 | FileCheck %s + + + +! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxi32 : !fir.ref>> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : i32, !fir.box> +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref>> +! CHECK: omp.yield(%[[VAL_8]] : !fir.ref>>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>> +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered { +! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_11]], %[[VAL_12]] : i32 +! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref +! CHECK: } +! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } + +! CHECK-LABEL: func.func @_QPs( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]]:2 = hlfir.declare %[[VAL_0]] {uniq_name = "_QFsEx"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFsEi"} +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_2]] {uniq_name = "_QFsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_4:.*]] = fir.load %[[VAL_1]]#0 : !fir.ref +! CHECK: %[[VAL_5:.*]] = fir.convert %[[VAL_4]] : (i32) -> i64 +! CHECK: %[[VAL_6:.*]] = fir.convert %[[VAL_5]] : (i64) -> index +! CHECK: %[[VAL_7:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_8:.*]] = arith.cmpi sgt, %[[VAL_6]], %[[VAL_7]] : index +! CHECK: %[[VAL_9:.*]] = arith.select %[[VAL_8]], %[[VAL_6]], %[[VAL_7]] : index +! CHECK: %[[VAL_10:.*]] = fir.alloca !fir.array, %[[VAL_9]] {bindc_name = "c", uniq_name = "_QFsEc"} +! CHECK: %[[VAL_11:.*]] = fir.shape %[[VAL_9]] : (index) -> !fir.shape<1> +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]](%[[VAL_11]]) {uniq_name = "_QFsEc"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_13:.*]] = arith.constant 0 : i32 +! CHECK: hlfir.assign %[[VAL_13]] to %[[VAL_12]]#0 : i32, !fir.box> +! CHECK: omp.parallel { +! CHECK: %[[VAL_14:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[VAL_14]] {uniq_name = "_QFsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_17:.*]] = arith.constant 100 : i32 +! CHECK: %[[VAL_18:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_19:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_12]]#0 to %[[VAL_19]] : !fir.ref>> +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxi32 %[[VAL_19]] -> %[[VAL_20:.*]] : !fir.ref>>) for (%[[VAL_21:.*]]) : i32 = (%[[VAL_16]]) to (%[[VAL_17]]) inclusive step (%[[VAL_18]]) { +! CHECK: fir.store %[[VAL_21]] to %[[VAL_15]]#1 : !fir.ref +! CHECK: %[[VAL_22:.*]]:2 = hlfir.declare %[[VAL_20]] {uniq_name = "_QFsEc"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_23:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref>> +! CHECK: %[[VAL_24:.*]] = fir.load %[[VAL_15]]#0 : !fir.ref +! CHECK: %[[VAL_25:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_26:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_25]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_27:.*]] = fir.shape %[[VAL_26]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_28:.*]] = hlfir.elemental %[[VAL_27]] unordered : (!fir.shape<1>) -> !hlfir.expr { +! CHECK: ^bb0(%[[VAL_29:.*]]: index): +! CHECK: %[[VAL_30:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_31:.*]]:3 = fir.box_dims %[[VAL_23]], %[[VAL_30]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_32:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_33:.*]] = arith.subi %[[VAL_31]]#0, %[[VAL_32]] : index +! CHECK: %[[VAL_34:.*]] = arith.addi %[[VAL_29]], %[[VAL_33]] : index +! CHECK: %[[VAL_35:.*]] = hlfir.designate %[[VAL_23]] (%[[VAL_34]]) : (!fir.box>, index) -> !fir.ref +! CHECK: %[[VAL_36:.*]] = fir.load %[[VAL_35]] : !fir.ref +! CHECK: %[[VAL_37:.*]] = arith.addi %[[VAL_36]], %[[VAL_24]] : i32 +! CHECK: hlfir.yield_element %[[VAL_37]] : i32 +! CHECK: } +! CHECK: %[[VAL_38:.*]] = fir.load %[[VAL_22]]#0 : !fir.ref>> +! CHECK: hlfir.assign %[[VAL_28]] to %[[VAL_38]] : !hlfir.expr, !fir.box> +! CHECK: hlfir.destroy %[[VAL_28]] : !hlfir.expr +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: %[[VAL_39:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_40:.*]] = hlfir.designate %[[VAL_12]]#0 (%[[VAL_39]]) : (!fir.box>, index) -> !fir.ref +! CHECK: %[[VAL_41:.*]] = fir.load %[[VAL_40]] : !fir.ref +! CHECK: %[[VAL_42:.*]] = arith.constant 5050 : i32 +! CHECK: %[[VAL_43:.*]] = arith.cmpi ne, %[[VAL_41]], %[[VAL_42]] : i32 +! CHECK: cf.cond_br %[[VAL_43]], ^bb1, ^bb2 +! CHECK: ^bb1: +! CHECK: %[[VAL_44:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_45:.*]] = arith.constant false +! CHECK: %[[VAL_46:.*]] = arith.constant false +! CHECK: %[[VAL_47:.*]] = fir.call @_FortranAStopStatement(%[[VAL_44]], %[[VAL_45]], %[[VAL_46]]) fastmath : (i32, i1, i1) -> none +! CHECK: fir.unreachable +! CHECK: ^bb2: +! CHECK: return +! CHECK: } +! CHECK: func.func private @_FortranAStopStatement(i32, i1, i1) -> none attributes {fir.runtime} + +subroutine s(x) + integer :: x + integer :: c(x) + c = 0 + !$omp parallel do reduction(+:c) + do i = 1, 100 + c = c + i + end do + !$omp end parallel do + + if (c(1) /= 5050) stop 1 +end subroutine s \ No newline at end of file diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 index a20ed1c..a898204 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array.f90 @@ -60,7 +60,7 @@ end program ! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> ! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 index 6159987..f3745c8 100644 --- a/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array2.f90 @@ -60,7 +60,7 @@ end program ! CHECK: %[[VAL_8:.*]] = arith.constant 0 : i32 ! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 ! CHECK: %[[VAL_10:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#1(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> +! CHECK: %[[VAL_11:.*]] = fir.embox %[[VAL_5]]#0(%[[VAL_4]]) : (!fir.ref>, !fir.shape<1>) -> !fir.box> ! CHECK: %[[VAL_12:.*]] = fir.alloca !fir.box> ! CHECK: fir.store %[[VAL_11]] to %[[VAL_12]] : !fir.ref>> ! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_2xi32 %[[VAL_12]] -> %[[VAL_13:.*]] : !fir.ref>>) for (%[[VAL_14:.*]]) : i32 = (%[[VAL_8]]) to (%[[VAL_9]]) inclusive step (%[[VAL_10]]) { -- cgit v1.1 From e8aaa3eaedc3b5519747a68e772f0bc664b89154 Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Thu, 4 Apr 2024 04:12:36 +0000 Subject: Revert "[libc] Added transitive bindings for OffsetType (#87397)" This reverts commit 3ee93f486293420852fb9ec95af9c5f54cecdb08 because it broke Fuchsia Clang toolchain builders: https://logs.chromium.org/logs/fuchsia/buildbucket/cr-buildbucket/8751633430491432833/+/u/clang/build/stdout --- libc/config/baremetal/api.td | 5 +---- libc/config/gpu/api.td | 6 +----- libc/config/linux/api.td | 12 ++---------- libc/include/CMakeLists.txt | 10 ++++------ libc/spec/posix.td | 7 ++----- libc/src/stdio/fseeko.h | 1 + libc/src/stdio/ftello.h | 1 + 7 files changed, 12 insertions(+), 30 deletions(-) diff --git a/libc/config/baremetal/api.td b/libc/config/baremetal/api.td index 690edbd..25aa06a 100644 --- a/libc/config/baremetal/api.td +++ b/libc/config/baremetal/api.td @@ -57,10 +57,7 @@ def MathAPI : PublicAPI<"math.h"> { } def StdIOAPI : PublicAPI<"stdio.h"> { - let Types = [ - "size_t", - "off_t", - ]; + let Types = ["size_t"]; } def StdlibAPI : PublicAPI<"stdlib.h"> { diff --git a/libc/config/gpu/api.td b/libc/config/gpu/api.td index 523ad49..adaf5bf 100644 --- a/libc/config/gpu/api.td +++ b/libc/config/gpu/api.td @@ -64,11 +64,7 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = [ - "FILE", - "off_t", - "size_t", - ]; + let Types = ["size_t", "FILE"]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { diff --git a/libc/config/linux/api.td b/libc/config/linux/api.td index 9964971..eb5ed80 100644 --- a/libc/config/linux/api.td +++ b/libc/config/linux/api.td @@ -49,10 +49,7 @@ def CTypeAPI : PublicAPI<"ctype.h"> { } def FCntlAPI : PublicAPI<"fcntl.h"> { - let Types = [ - "mode_t", - "off_t", - ]; + let Types = ["mode_t"]; } def IntTypesAPI : PublicAPI<"inttypes.h"> { @@ -80,12 +77,7 @@ def StdIOAPI : PublicAPI<"stdio.h"> { SimpleMacroDef<"_IOLBF", "1">, SimpleMacroDef<"_IONBF", "2">, ]; - let Types = [ - "FILE", - "cookie_io_functions_t", - "off_t", - "size_t", - ]; + let Types = ["size_t", "FILE", "cookie_io_functions_t"]; } def StdlibAPI : PublicAPI<"stdlib.h"> { diff --git a/libc/include/CMakeLists.txt b/libc/include/CMakeLists.txt index 02c7dc8..4203f0b 100644 --- a/libc/include/CMakeLists.txt +++ b/libc/include/CMakeLists.txt @@ -41,10 +41,9 @@ add_gen_header( DEF_FILE fcntl.h.def GEN_HDR fcntl.h DEPENDS + .llvm_libc_common_h .llvm-libc-macros.fcntl_macros .llvm-libc-types.mode_t - .llvm-libc-types.off_t - .llvm_libc_common_h ) add_gen_header( @@ -265,14 +264,13 @@ add_gen_header( DEF_FILE stdio.h.def GEN_HDR stdio.h DEPENDS + .llvm_libc_common_h .llvm-libc-macros.file_seek_macros .llvm-libc-macros.stdio_macros - .llvm-libc-types.FILE - .llvm-libc-types.cookie_io_functions_t - .llvm-libc-types.off_t .llvm-libc-types.size_t .llvm-libc-types.ssize_t - .llvm_libc_common_h + .llvm-libc-types.FILE + .llvm-libc-types.cookie_io_functions_t ) add_gen_header( diff --git a/libc/spec/posix.td b/libc/spec/posix.td index 45f7ecf..cfa8d3a 100644 --- a/libc/spec/posix.td +++ b/libc/spec/posix.td @@ -210,10 +210,7 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec FCntl = HeaderSpec< "fcntl.h", [], // Macros - [ - ModeTType, - OffTType, - ], + [ModeTType], [], // Enumerations [ FunctionSpec< @@ -1183,7 +1180,7 @@ def POSIX : StandardSpec<"POSIX"> { HeaderSpec StdIO = HeaderSpec< "stdio.h", [], // Macros - [OffTType], // Types + [], // Types [], // Enumerations [ FunctionSpec< diff --git a/libc/src/stdio/fseeko.h b/libc/src/stdio/fseeko.h index 77fb412..3202ed2 100644 --- a/libc/src/stdio/fseeko.h +++ b/libc/src/stdio/fseeko.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_FSEEKO_H #include +#include namespace LIBC_NAMESPACE { diff --git a/libc/src/stdio/ftello.h b/libc/src/stdio/ftello.h index 5ab17f9..0fdf13a 100644 --- a/libc/src/stdio/ftello.h +++ b/libc/src/stdio/ftello.h @@ -10,6 +10,7 @@ #define LLVM_LIBC_SRC_STDIO_FTELLO_H #include +#include namespace LIBC_NAMESPACE { -- cgit v1.1 From 3a7b5223a6639e497c856368da11b5d74ec9d6e8 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Thu, 4 Apr 2024 12:36:15 +0800 Subject: [DAGCombiner][RISCV] Handle truncating splats in isNeutralConstant (#87338) On RV64, we legalize zexts of i1s to (vselect m, (splat_vector i64 1), (splat_vector i64 0)), where the splat_vectors are implicitly truncating. When the vselect is used by a binop we want to pull the vselect out via foldSelectWithIdentityConstant. But because vectors with an element size < i64 will truncate, isNeutralConstant will return false. This patch handles truncating splats by getting the APInt value and truncating it. We almost don't need to do this since most of the neutral elements are either one/zero/all ones, but it will make a difference for smax and smin. I wasn't able to figure out a way to write the tests in terms of select, since we need the i1 zext legalization to create a truncating splat_vector. This supercedes #87236. Fixed vectors are unfortunately not handled by this patch (since they get legalized to _VL nodes), but they don't seem to appear in the wild. --- llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 18 +- .../CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll | 21 +-- .../CodeGen/RISCV/rvv/fold-binop-into-select.ll | 60 ++++++ .../RISCV/rvv/vscale-vw-web-simplification.ll | 209 ++++++++++----------- 4 files changed, 181 insertions(+), 127 deletions(-) create mode 100644 llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 8c543ae..25b51d5 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -11545,30 +11545,32 @@ bool llvm::isNeutralConstant(unsigned Opcode, SDNodeFlags Flags, SDValue V, unsigned OperandNo) { // NOTE: The cases should match with IR's ConstantExpr::getBinOpIdentity(). // TODO: Target-specific opcodes could be added. - if (auto *Const = isConstOrConstSplat(V)) { + if (auto *ConstV = isConstOrConstSplat(V, /*AllowUndefs*/ false, + /*AllowTruncation*/ true)) { + APInt Const = ConstV->getAPIntValue().trunc(V.getScalarValueSizeInBits()); switch (Opcode) { case ISD::ADD: case ISD::OR: case ISD::XOR: case ISD::UMAX: - return Const->isZero(); + return Const.isZero(); case ISD::MUL: - return Const->isOne(); + return Const.isOne(); case ISD::AND: case ISD::UMIN: - return Const->isAllOnes(); + return Const.isAllOnes(); case ISD::SMAX: - return Const->isMinSignedValue(); + return Const.isMinSignedValue(); case ISD::SMIN: - return Const->isMaxSignedValue(); + return Const.isMaxSignedValue(); case ISD::SUB: case ISD::SHL: case ISD::SRA: case ISD::SRL: - return OperandNo == 1 && Const->isZero(); + return OperandNo == 1 && Const.isZero(); case ISD::UDIV: case ISD::SDIV: - return OperandNo == 1 && Const->isOne(); + return OperandNo == 1 && Const.isOne(); } } else if (auto *ConstFP = isConstOrConstSplatFP(V)) { switch (Opcode) { diff --git a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll index bafa92e..65d0768 100644 --- a/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll +++ b/llvm/test/CodeGen/RISCV/intrinsic-cttz-elts-vscale.ll @@ -18,14 +18,12 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV32-NEXT: vmsne.vi v0, v8, 0 ; RV32-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV32-NEXT: vmv.v.i v8, 0 -; RV32-NEXT: vmerge.vim v8, v8, -1, v0 -; RV32-NEXT: vand.vv v8, v11, v8 +; RV32-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV32-NEXT: vredmaxu.vs v8, v8, v8 ; RV32-NEXT: vmv.x.s a1, v8 ; RV32-NEXT: sub a0, a0, a1 -; RV32-NEXT: lui a1, 16 -; RV32-NEXT: addi a1, a1, -1 -; RV32-NEXT: and a0, a0, a1 +; RV32-NEXT: slli a0, a0, 16 +; RV32-NEXT: srli a0, a0, 16 ; RV32-NEXT: ret ; ; RV64-LABEL: ctz_nxv4i32: @@ -41,14 +39,12 @@ define i32 @ctz_nxv4i32( %a) #0 { ; RV64-NEXT: vmsne.vi v0, v8, 0 ; RV64-NEXT: vsetvli zero, zero, e16, m1, ta, ma ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v11, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v11, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 -; RV64-NEXT: sub a0, a0, a1 -; RV64-NEXT: lui a1, 16 -; RV64-NEXT: addiw a1, a1, -1 -; RV64-NEXT: and a0, a0, a1 +; RV64-NEXT: subw a0, a0, a1 +; RV64-NEXT: slli a0, a0, 48 +; RV64-NEXT: srli a0, a0, 48 ; RV64-NEXT: ret %res = call i32 @llvm.experimental.cttz.elts.i32.nxv4i32( %a, i1 0) ret i32 %res @@ -158,8 +154,7 @@ define i32 @ctz_nxv16i1( %pg, %a) { ; RV64-NEXT: li a1, -1 ; RV64-NEXT: vmadd.vx v16, a1, v8 ; RV64-NEXT: vmv.v.i v8, 0 -; RV64-NEXT: vmerge.vim v8, v8, -1, v0 -; RV64-NEXT: vand.vv v8, v16, v8 +; RV64-NEXT: vmerge.vvm v8, v8, v16, v0 ; RV64-NEXT: vredmaxu.vs v8, v8, v8 ; RV64-NEXT: vmv.x.s a1, v8 ; RV64-NEXT: subw a0, a0, a1 diff --git a/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll new file mode 100644 index 0000000..3a8d08f --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/fold-binop-into-select.ll @@ -0,0 +1,60 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 +; RUN: llc -mtriple=riscv32 -mattr=+v -verify-machineinstrs < %s | FileCheck %s +; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs < %s | FileCheck %s + +; The following binop x, (zext i1) tests will be vector-legalized into a vselect +; of two splat_vectors, but on RV64 the splat value will be implicitly +; truncated: +; +; t15: nxv2i32 = splat_vector Constant:i64<1> +; t13: nxv2i32 = splat_vector Constant:i64<0> +; t16: nxv2i32 = vselect t2, t15, t13 +; t7: nxv2i32 = add t4, t16 +; +; Make sure that foldSelectWithIdentityConstant in DAGCombiner.cpp handles the +; truncating splat, so we pull the vselect back and fold it into a mask. + +define @i1_zext_add( %a, %b) { +; CHECK-LABEL: i1_zext_add: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %b, %zext + ret %add +} + +define @i1_zext_add_commuted( %a, %b) { +; CHECK-LABEL: i1_zext_add_commuted: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vadd.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %add = add %zext, %b + ret %add +} + +define @i1_zext_sub( %a, %b) { +; CHECK-LABEL: i1_zext_sub: +; CHECK: # %bb.0: +; CHECK-NEXT: li a0, 1 +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, mu +; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %sub = sub %b, %zext + ret %sub +} + +define @i1_zext_or( %a, %b) { +; CHECK-LABEL: i1_zext_or: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e32, m1, ta, mu +; CHECK-NEXT: vor.vi v8, v8, 1, v0.t +; CHECK-NEXT: ret + %zext = zext %a to + %or = or %b, %zext + ret %or +} diff --git a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll index e56dca0..a14ce71 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll @@ -149,49 +149,49 @@ define @vwop_vscale_sext_i32i64_multiple_users(ptr %x, ptr %y } define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v8, (a0) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vlm.v v10, (a2) -; RV32-NEXT: vmv.v.i v11, 0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v12, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vmerge.vim v9, v11, -1, v0 -; RV32-NEXT: vmv.v.v v0, v10 -; RV32-NEXT: vmerge.vim v10, v11, -1, v0 -; RV32-NEXT: vmul.vv v9, v12, v9 -; RV32-NEXT: li a0, 1 -; RV32-NEXT: vsub.vv v11, v12, v10 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vsub.vx v10, v10, a0, v0.t -; RV32-NEXT: vor.vv v8, v9, v10 -; RV32-NEXT: vor.vv v8, v8, v11 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v8, (a0) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vlm.v v10, (a2) +; NO_FOLDING-NEXT: vmv.v.i v11, 0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v10 +; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_sext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v8, (a0) -; RV64-NEXT: vlm.v v9, (a1) -; RV64-NEXT: vlm.v v10, (a2) -; RV64-NEXT: vmv.v.i v11, 0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v12, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v11, -1, v0 -; RV64-NEXT: vmv.v.v v0, v10 -; RV64-NEXT: vmerge.vim v10, v11, -1, v0 -; RV64-NEXT: vmul.vv v9, v12, v9 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v11, 1, v0 -; RV64-NEXT: vsub.vv v8, v10, v8 -; RV64-NEXT: vsub.vv v10, v12, v10 -; RV64-NEXT: vor.vv v8, v9, v8 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_sext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v8, (a0) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vlm.v v10, (a2) +; FOLDING-NEXT: vmv.v.i v11, 0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v12, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vmerge.vim v9, v11, -1, v0 +; FOLDING-NEXT: vmv.v.v v0, v10 +; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 +; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 +; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y %b2 = load , ptr %z @@ -209,7 +209,7 @@ define @vwop_vscale_sext_i1i32_multiple_users(ptr %x, ptr %y, define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v8, (a0) ; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vlm.v v10, (a2) @@ -221,17 +221,17 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; NO_FOLDING-NEXT: vmv1r.v v0, v10 ; NO_FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; NO_FOLDING-NEXT: vmul.vv v9, v12, v9 +; NO_FOLDING-NEXT: li a0, 1 +; NO_FOLDING-NEXT: vsub.vv v11, v12, v10 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 -; NO_FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; NO_FOLDING-NEXT: vsub.vv v8, v10, v8 -; NO_FOLDING-NEXT: vsub.vv v10, v12, v10 -; NO_FOLDING-NEXT: vor.vv v8, v9, v8 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 +; NO_FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v9, v10 +; NO_FOLDING-NEXT: vor.vv v8, v8, v11 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_sext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v8, (a0) ; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vlm.v v10, (a2) @@ -243,12 +243,12 @@ define @vwop_vscale_sext_i1i8_multiple_users(ptr %x, ptr %y, p ; FOLDING-NEXT: vmv1r.v v0, v10 ; FOLDING-NEXT: vmerge.vim v10, v11, -1, v0 ; FOLDING-NEXT: vmul.vv v9, v12, v9 +; FOLDING-NEXT: li a0, 1 +; FOLDING-NEXT: vsub.vv v11, v12, v10 ; FOLDING-NEXT: vmv1r.v v0, v8 -; FOLDING-NEXT: vmerge.vim v8, v11, 1, v0 -; FOLDING-NEXT: vsub.vv v8, v10, v8 -; FOLDING-NEXT: vsub.vv v10, v12, v10 -; FOLDING-NEXT: vor.vv v8, v9, v8 -; FOLDING-NEXT: vor.vv v8, v8, v10 +; FOLDING-NEXT: vsub.vx v10, v10, a0, v0.t +; FOLDING-NEXT: vor.vv v8, v9, v10 +; FOLDING-NEXT: vor.vv v8, v8, v11 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -444,41 +444,39 @@ define @vwop_vscale_zext_i32i64_multiple_users(ptr %x, ptr %y } define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, ptr %z) { -; RV32-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV32: # %bb.0: -; RV32-NEXT: vsetvli a3, zero, e32, m1, ta, mu -; RV32-NEXT: vlm.v v0, (a0) -; RV32-NEXT: vlm.v v8, (a2) -; RV32-NEXT: vlm.v v9, (a1) -; RV32-NEXT: vmv.v.i v10, 0 -; RV32-NEXT: vmerge.vim v11, v10, 1, v0 -; RV32-NEXT: vmv.v.v v0, v8 -; RV32-NEXT: vmerge.vim v8, v10, 1, v0 -; RV32-NEXT: vadd.vv v10, v11, v8 -; RV32-NEXT: vsub.vv v8, v11, v8 -; RV32-NEXT: vmv.v.v v0, v9 -; RV32-NEXT: vor.vv v10, v10, v11, v0.t -; RV32-NEXT: vor.vv v8, v10, v8 -; RV32-NEXT: ret +; NO_FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; NO_FOLDING: # %bb.0: +; NO_FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; NO_FOLDING-NEXT: vlm.v v0, (a0) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) +; NO_FOLDING-NEXT: vmv.v.i v10, 0 +; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; NO_FOLDING-NEXT: vmv.v.v v0, v8 +; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 +; NO_FOLDING-NEXT: vmv.v.v v0, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 +; NO_FOLDING-NEXT: ret ; -; RV64-LABEL: vwop_vscale_zext_i1i32_multiple_users: -; RV64: # %bb.0: -; RV64-NEXT: vsetvli a3, zero, e32, m1, ta, ma -; RV64-NEXT: vlm.v v0, (a0) -; RV64-NEXT: vlm.v v8, (a1) -; RV64-NEXT: vlm.v v9, (a2) -; RV64-NEXT: vmv.v.i v10, 0 -; RV64-NEXT: vmerge.vim v11, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v8 -; RV64-NEXT: vmerge.vim v8, v10, 1, v0 -; RV64-NEXT: vmv.v.v v0, v9 -; RV64-NEXT: vmerge.vim v9, v10, 1, v0 -; RV64-NEXT: vmul.vv v8, v11, v8 -; RV64-NEXT: vadd.vv v10, v11, v9 -; RV64-NEXT: vsub.vv v9, v11, v9 -; RV64-NEXT: vor.vv v8, v8, v10 -; RV64-NEXT: vor.vv v8, v8, v9 -; RV64-NEXT: ret +; FOLDING-LABEL: vwop_vscale_zext_i1i32_multiple_users: +; FOLDING: # %bb.0: +; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, mu +; FOLDING-NEXT: vlm.v v0, (a0) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) +; FOLDING-NEXT: vmv.v.i v10, 0 +; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 +; FOLDING-NEXT: vmv.v.v v0, v8 +; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 +; FOLDING-NEXT: vmv.v.v v0, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 +; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y %b2 = load , ptr %z @@ -496,40 +494,36 @@ define @vwop_vscale_zext_i1i32_multiple_users(ptr %x, ptr %y, define @vwop_vscale_zext_i1i8_multiple_users(ptr %x, ptr %y, ptr %z) { ; NO_FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; NO_FOLDING: # %bb.0: -; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; NO_FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; NO_FOLDING-NEXT: vlm.v v0, (a0) -; NO_FOLDING-NEXT: vlm.v v8, (a1) -; NO_FOLDING-NEXT: vlm.v v9, (a2) +; NO_FOLDING-NEXT: vlm.v v8, (a2) +; NO_FOLDING-NEXT: vlm.v v9, (a1) ; NO_FOLDING-NEXT: vmv.v.i v10, 0 ; NO_FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; NO_FOLDING-NEXT: vmv1r.v v0, v8 ; NO_FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; NO_FOLDING-NEXT: vadd.vv v10, v11, v8 +; NO_FOLDING-NEXT: vsub.vv v8, v11, v8 ; NO_FOLDING-NEXT: vmv1r.v v0, v9 -; NO_FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; NO_FOLDING-NEXT: vmul.vv v8, v11, v8 -; NO_FOLDING-NEXT: vadd.vv v10, v11, v9 -; NO_FOLDING-NEXT: vsub.vv v9, v11, v9 -; NO_FOLDING-NEXT: vor.vv v8, v8, v10 -; NO_FOLDING-NEXT: vor.vv v8, v8, v9 +; NO_FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; NO_FOLDING-NEXT: vor.vv v8, v10, v8 ; NO_FOLDING-NEXT: ret ; ; FOLDING-LABEL: vwop_vscale_zext_i1i8_multiple_users: ; FOLDING: # %bb.0: -; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, ma +; FOLDING-NEXT: vsetvli a3, zero, e8, mf4, ta, mu ; FOLDING-NEXT: vlm.v v0, (a0) -; FOLDING-NEXT: vlm.v v8, (a1) -; FOLDING-NEXT: vlm.v v9, (a2) +; FOLDING-NEXT: vlm.v v8, (a2) +; FOLDING-NEXT: vlm.v v9, (a1) ; FOLDING-NEXT: vmv.v.i v10, 0 ; FOLDING-NEXT: vmerge.vim v11, v10, 1, v0 ; FOLDING-NEXT: vmv1r.v v0, v8 ; FOLDING-NEXT: vmerge.vim v8, v10, 1, v0 +; FOLDING-NEXT: vadd.vv v10, v11, v8 +; FOLDING-NEXT: vsub.vv v8, v11, v8 ; FOLDING-NEXT: vmv1r.v v0, v9 -; FOLDING-NEXT: vmerge.vim v9, v10, 1, v0 -; FOLDING-NEXT: vmul.vv v8, v11, v8 -; FOLDING-NEXT: vadd.vv v10, v11, v9 -; FOLDING-NEXT: vsub.vv v9, v11, v9 -; FOLDING-NEXT: vor.vv v8, v8, v10 -; FOLDING-NEXT: vor.vv v8, v8, v9 +; FOLDING-NEXT: vor.vv v10, v10, v11, v0.t +; FOLDING-NEXT: vor.vv v8, v10, v8 ; FOLDING-NEXT: ret %a = load , ptr %x %b = load , ptr %y @@ -594,3 +588,6 @@ define @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y, +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} -- cgit v1.1 From d89914f30bc7c180fe349a5aa0f03438ae6c20a4 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Wed, 3 Apr 2024 21:48:38 -0700 Subject: [memprof] Add Version2 of IndexedMemProfRecord serialization (#87455) I'm currently developing a new version of the indexed memprof format where we deduplicate call stacks in IndexedAllocationInfo::CallStack and IndexedMemProfRecord::CallSites. We refer to call stacks with integer IDs, namely CallStackId, just as we refer to Frame with FrameId. The deduplication will cut down the profile file size by 80% in a large memprof file of mine. As a step toward the goal, this patch teaches IndexedMemProfRecord::{serialize,deserialize} to speak Version2. A subsequent patch will add Version2 support to llvm-profdata. The essense of the patch is to replace the serialization of a call stack, a vector of FrameIDs, with that of a CallStackId. That is: const IndexedAllocationInfo &N = ...; ... LE.write(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write(Id); becomes: LE.write(N.CSId); --- llvm/include/llvm/ProfileData/MemProf.h | 61 ++++------- llvm/lib/ProfileData/InstrProfReader.cpp | 2 +- llvm/lib/ProfileData/InstrProfWriter.cpp | 6 +- llvm/lib/ProfileData/MemProf.cpp | 170 +++++++++++++++++++++++++++-- llvm/unittests/ProfileData/MemProfTest.cpp | 41 ++++++- 5 files changed, 226 insertions(+), 54 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index ff00900..110e697 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -22,6 +22,8 @@ enum IndexedVersion : uint64_t { Version0 = 0, // Version 1: Added a version field to the header. Version1 = 1, + // Version 2: Added a call stack table. Under development. + Version2 = 2, }; constexpr uint64_t MinimumSupportedVersion = Version0; @@ -289,23 +291,14 @@ struct IndexedAllocationInfo { : CallStack(CS.begin(), CS.end()), CSId(CSId), Info(MB) {} // Returns the size in bytes when this allocation info struct is serialized. - size_t serializedSize() const { - return sizeof(uint64_t) + // The number of frames to serialize. - sizeof(FrameId) * CallStack.size() + // The callstack frame ids. - PortableMemInfoBlock::serializedSize(); // The size of the payload. - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedAllocationInfo &Other) const { if (Other.Info != Info) return false; - if (Other.CallStack.size() != CallStack.size()) + if (Other.CSId != CSId) return false; - - for (size_t J = 0; J < Other.CallStack.size(); J++) { - if (Other.CallStack[J] != CallStack[J]) - return false; - } return true; } @@ -357,6 +350,9 @@ struct IndexedMemProfRecord { // inline location list may include additional entries, users should pick // the last entry in the list with the same function GUID. llvm::SmallVector> CallSites; + // Conceptually the same as above. We are going to keep both CallSites and + // CallSiteIds while we are transitioning from CallSites to CallSiteIds. + llvm::SmallVector CallSiteIds; void clear() { AllocSites.clear(); @@ -370,47 +366,31 @@ struct IndexedMemProfRecord { CallSites.append(Other.CallSites); } - size_t serializedSize() const { - size_t Result = sizeof(GlobalValue::GUID); - for (const IndexedAllocationInfo &N : AllocSites) - Result += N.serializedSize(); - - // The number of callsites we have information for. - Result += sizeof(uint64_t); - for (const auto &Frames : CallSites) { - // The number of frame ids to serialize. - Result += sizeof(uint64_t); - Result += Frames.size() * sizeof(FrameId); - } - return Result; - } + size_t serializedSize(IndexedVersion Version) const; bool operator==(const IndexedMemProfRecord &Other) const { if (Other.AllocSites.size() != AllocSites.size()) return false; - if (Other.CallSites.size() != CallSites.size()) - return false; - for (size_t I = 0; I < AllocSites.size(); I++) { if (AllocSites[I] != Other.AllocSites[I]) return false; } - for (size_t I = 0; I < CallSites.size(); I++) { - if (CallSites[I] != Other.CallSites[I]) - return false; - } + if (Other.CallSiteIds != CallSiteIds) + return false; return true; } // Serializes the memprof records in \p Records to the ostream \p OS based // on the schema provided in \p Schema. - void serialize(const MemProfSchema &Schema, raw_ostream &OS); + void serialize(const MemProfSchema &Schema, raw_ostream &OS, + IndexedVersion Version); // Deserializes memprof records from the Buffer. static IndexedMemProfRecord deserialize(const MemProfSchema &Schema, - const unsigned char *Buffer); + const unsigned char *Buffer, + IndexedVersion Version); // Returns the GUID for the function name after canonicalization. For // memprof, we remove any .llvm suffix added by LTO. MemProfRecords are @@ -480,7 +460,8 @@ public: using offset_type = uint64_t; RecordLookupTrait() = delete; - RecordLookupTrait(const MemProfSchema &S) : Schema(S) {} + RecordLookupTrait(IndexedVersion V, const MemProfSchema &S) + : Version(V), Schema(S) {} static bool EqualKey(uint64_t A, uint64_t B) { return A == B; } static uint64_t GetInternalKey(uint64_t K) { return K; } @@ -507,11 +488,13 @@ public: data_type ReadData(uint64_t K, const unsigned char *D, offset_type /*Unused*/) { - Record = IndexedMemProfRecord::deserialize(Schema, D); + Record = IndexedMemProfRecord::deserialize(Schema, D, Version); return Record; } private: + // Holds the MemProf version. + IndexedVersion Version; // Holds the memprof schema used to deserialize records. MemProfSchema Schema; // Holds the records from one function deserialized from the indexed format. @@ -519,7 +502,7 @@ private: }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. -class RecordWriterTrait { +template class RecordWriterTrait { public: using key_type = uint64_t; using key_type_ref = uint64_t; @@ -546,7 +529,7 @@ public: endian::Writer LE(Out, llvm::endianness::little); offset_type N = sizeof(K); LE.write(N); - offset_type M = V.serializedSize(); + offset_type M = V.serializedSize(Version); LE.write(M); return std::make_pair(N, M); } @@ -560,7 +543,7 @@ public: void EmitData(raw_ostream &Out, key_type_ref /*Unused*/, data_type_ref V, offset_type /*Unused*/) { assert(Schema != nullptr && "MemProf schema is not initialized!"); - V.serialize(*Schema, Out); + V.serialize(*Schema, Out, Version); // Clear the IndexedMemProfRecord which results in clearing/freeing its // vectors of allocs and callsites. This is owned by the associated on-disk // hash table, but unused after this point. See also the comment added to diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp index 7ac5c56..884334e 100644 --- a/llvm/lib/ProfileData/InstrProfReader.cpp +++ b/llvm/lib/ProfileData/InstrProfReader.cpp @@ -1303,7 +1303,7 @@ Error IndexedInstrProfReader::readHeader() { MemProfRecordTable.reset(MemProfRecordHashTable::Create( /*Buckets=*/Start + RecordTableOffset, /*Payload=*/Ptr, - /*Base=*/Start, memprof::RecordLookupTrait(Schema))); + /*Base=*/Start, memprof::RecordLookupTrait(memprof::Version1, Schema))); // Initialize the frame table reader with the payload and bucket offsets. MemProfFrameTable.reset(MemProfFrameHashTable::Create( diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index c2c94ba..a1bc180 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -557,9 +557,11 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { OS.write(static_cast(Id)); } - auto RecordWriter = std::make_unique(); + auto RecordWriter = + std::make_unique>(); RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator + OnDiskChainedHashTableGenerator< + memprof::RecordWriterTrait> RecordTableGenerator; for (auto &I : MemProfRecordData) { // Insert the key (func hash) and value (memprof record). diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp index 6c41981..ac0a870 100644 --- a/llvm/lib/ProfileData/MemProf.cpp +++ b/llvm/lib/ProfileData/MemProf.cpp @@ -10,15 +10,88 @@ namespace llvm { namespace memprof { +namespace { +size_t serializedSizeV0(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The number of frames to serialize. + Size += sizeof(uint64_t); + // The callstack frame ids. + Size += sizeof(FrameId) * IAI.CallStack.size(); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} -void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, - raw_ostream &OS) { +size_t serializedSizeV2(const IndexedAllocationInfo &IAI) { + size_t Size = 0; + // The CallStackId + Size += sizeof(CallStackId); + // The size of the payload. + Size += PortableMemInfoBlock::serializedSize(); + return Size; +} +} // namespace + +size_t IndexedAllocationInfo::serializedSize(IndexedVersion Version) const { + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(*this); + case Version2: + return serializedSizeV2(*this); + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +size_t serializedSizeV0(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version0); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + for (const auto &Frames : Record.CallSites) { + // The number of frame ids to serialize. + Result += sizeof(uint64_t); + Result += Frames.size() * sizeof(FrameId); + } + return Result; +} + +size_t serializedSizeV2(const IndexedMemProfRecord &Record) { + size_t Result = sizeof(GlobalValue::GUID); + for (const IndexedAllocationInfo &N : Record.AllocSites) + Result += N.serializedSize(Version2); + + // The number of callsites we have information for. + Result += sizeof(uint64_t); + // The CallStackId + Result += Record.CallSiteIds.size() * sizeof(CallStackId); + return Result; +} +} // namespace + +size_t IndexedMemProfRecord::serializedSize(IndexedVersion Version) const { + switch (Version) { + case Version0: + case Version1: + return serializedSizeV0(*this); + case Version2: + return serializedSizeV2(*this); + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +void serializeV0(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { using namespace support; endian::Writer LE(OS, llvm::endianness::little); - LE.write(AllocSites.size()); - for (const IndexedAllocationInfo &N : AllocSites) { + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { LE.write(N.CallStack.size()); for (const FrameId &Id : N.CallStack) LE.write(Id); @@ -26,17 +99,50 @@ void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, } // Related contexts. - LE.write(CallSites.size()); - for (const auto &Frames : CallSites) { + LE.write(Record.CallSites.size()); + for (const auto &Frames : Record.CallSites) { LE.write(Frames.size()); for (const FrameId &Id : Frames) LE.write(Id); } } -IndexedMemProfRecord -IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, - const unsigned char *Ptr) { +void serializeV2(const IndexedMemProfRecord &Record, + const MemProfSchema &Schema, raw_ostream &OS) { + using namespace support; + + endian::Writer LE(OS, llvm::endianness::little); + + LE.write(Record.AllocSites.size()); + for (const IndexedAllocationInfo &N : Record.AllocSites) { + LE.write(N.CSId); + N.Info.serialize(Schema, OS); + } + + // Related contexts. + LE.write(Record.CallSiteIds.size()); + for (const auto &CSId : Record.CallSiteIds) + LE.write(CSId); +} +} // namespace + +void IndexedMemProfRecord::serialize(const MemProfSchema &Schema, + raw_ostream &OS, IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + serializeV0(*this, Schema, OS); + return; + case Version2: + serializeV2(*this, Schema, OS); + return; + } + llvm_unreachable("unsupported MemProf version"); +} + +namespace { +IndexedMemProfRecord deserializeV0(const MemProfSchema &Schema, + const unsigned char *Ptr) { using namespace support; IndexedMemProfRecord Record; @@ -73,11 +179,57 @@ IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, Frames.push_back(Id); } Record.CallSites.push_back(Frames); + Record.CallSiteIds.push_back(hashCallStack(Frames)); } return Record; } +IndexedMemProfRecord deserializeV2(const MemProfSchema &Schema, + const unsigned char *Ptr) { + using namespace support; + + IndexedMemProfRecord Record; + + // Read the meminfo nodes. + const uint64_t NumNodes = + endian::readNext(Ptr); + for (uint64_t I = 0; I < NumNodes; I++) { + IndexedAllocationInfo Node; + Node.CSId = + endian::readNext(Ptr); + Node.Info.deserialize(Schema, Ptr); + Ptr += PortableMemInfoBlock::serializedSize(); + Record.AllocSites.push_back(Node); + } + + // Read the callsite information. + const uint64_t NumCtxs = + endian::readNext(Ptr); + for (uint64_t J = 0; J < NumCtxs; J++) { + CallStackId CSId = + endian::readNext(Ptr); + Record.CallSiteIds.push_back(CSId); + } + + return Record; +} +} // namespace + +IndexedMemProfRecord +IndexedMemProfRecord::deserialize(const MemProfSchema &Schema, + const unsigned char *Ptr, + IndexedVersion Version) { + switch (Version) { + case Version0: + case Version1: + return deserializeV0(Schema, Ptr); + case Version2: + return deserializeV2(Schema, Ptr); + } + llvm_unreachable("unsupported MemProf version"); +} + GlobalValue::GUID IndexedMemProfRecord::getGUID(const StringRef FunctionName) { // Canonicalize the function name to drop suffixes such as ".llvm.". Note // we do not drop any ".__uniq." suffixes, as getCanonicalFnName does not drop diff --git a/llvm/unittests/ProfileData/MemProfTest.cpp b/llvm/unittests/ProfileData/MemProfTest.cpp index 1cca44e..f1aa6f3 100644 --- a/llvm/unittests/ProfileData/MemProfTest.cpp +++ b/llvm/unittests/ProfileData/MemProfTest.cpp @@ -265,7 +265,9 @@ TEST(MemProf, PortableWrapper) { EXPECT_EQ(3UL, ReadBlock.getAllocCpuId()); } -TEST(MemProf, RecordSerializationRoundTrip) { +// Version0 and Version1 serialize IndexedMemProfRecord in the same format, so +// we share one test. +TEST(MemProf, RecordSerializationRoundTripVersion0And1) { const MemProfSchema Schema = getFullSchema(); MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, @@ -284,14 +286,47 @@ TEST(MemProf, RecordSerializationRoundTrip) { Info); } Record.CallSites.assign(CallSites); + for (const auto &CS : CallSites) + Record.CallSiteIds.push_back(llvm::memprof::hashCallStack(CS)); std::string Buffer; llvm::raw_string_ostream OS(Buffer); - Record.serialize(Schema, OS); + Record.serialize(Schema, OS, llvm::memprof::Version0); OS.flush(); const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( - Schema, reinterpret_cast(Buffer.data())); + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version0); + + EXPECT_EQ(Record, GotRecord); +} + +TEST(MemProf, RecordSerializationRoundTripVerion2) { + const MemProfSchema Schema = getFullSchema(); + + MemInfoBlock Info(/*size=*/16, /*access_count=*/7, /*alloc_timestamp=*/1000, + /*dealloc_timestamp=*/2000, /*alloc_cpu=*/3, + /*dealloc_cpu=*/4); + + llvm::SmallVector CallStackIds = {0x123, 0x456}; + + llvm::SmallVector CallSiteIds = {0x333, 0x444}; + + IndexedMemProfRecord Record; + for (const auto &CSId : CallStackIds) { + // Use the same info block for both allocation sites. + Record.AllocSites.emplace_back(llvm::SmallVector(), CSId, Info); + } + Record.CallSiteIds.assign(CallSiteIds); + + std::string Buffer; + llvm::raw_string_ostream OS(Buffer); + Record.serialize(Schema, OS, llvm::memprof::Version2); + OS.flush(); + + const IndexedMemProfRecord GotRecord = IndexedMemProfRecord::deserialize( + Schema, reinterpret_cast(Buffer.data()), + llvm::memprof::Version2); EXPECT_EQ(Record, GotRecord); } -- cgit v1.1 From 4f19f15a601a5761b12c9c66d99d97dbc89ef90d Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Thu, 4 Apr 2024 08:20:13 +0300 Subject: [PAC][llvm-readobj][AArch64][ELF] Support `GNU_PROPERTY_AARCH64_FEATURE_PAUTH` (#87545) Reland #85231 after fixing build failure https://lab.llvm.org/buildbot/#/builders/186/builds/15631. Use `PRIx64` for format output of `uint64_t` as hex. Original PR description below. This adds support for `GNU_PROPERTY_AARCH64_FEATURE_PAUTH` feature (as defined in https://github.com/ARM-software/abi-aa/pull/240) handling in llvm-readobj and llvm-readelf. The following constants for supported platforms are also introduced: - `AARCH64_PAUTH_PLATFORM_INVALID = 0x0` - `AARCH64_PAUTH_PLATFORM_BAREMETAL = 0x1` - `AARCH64_PAUTH_PLATFORM_LLVM_LINUX = 0x10000002` For the llvm_linux platform, output of the tools contains descriptions of PAuth features which are enabled/disabled depending on the version value. Version value bits correspond to the following `LangOptions` defined in #85232: - bit 0: `PointerAuthIntrinsics`; - bit 1: `PointerAuthCalls`; - bit 2: `PointerAuthReturns`; - bit 3: `PointerAuthAuthTraps`; - bit 4: `PointerAuthVTPtrAddressDiscrimination`; - bit 5: `PointerAuthVTPtrTypeDiscrimination`; - bit 6: `PointerAuthInitFini`. Support for `.note.AARCH64-PAUTH-ABI-tag` is dropped since it's deleted from the spec in ARM-software/abi-aa#250. --- llvm/include/llvm/BinaryFormat/ELF.h | 26 +- .../ELF/AArch64/aarch64-feature-pauth.s | 298 ++++++++++++++------- .../ELF/AArch64/aarch64-note-gnu-property.s | 2 + llvm/tools/llvm-readobj/ELFDumper.cpp | 127 +++++---- 4 files changed, 296 insertions(+), 157 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELF.h b/llvm/include/llvm/BinaryFormat/ELF.h index 877f3f7..ed267c1 100644 --- a/llvm/include/llvm/BinaryFormat/ELF.h +++ b/llvm/include/llvm/BinaryFormat/ELF.h @@ -1712,11 +1712,6 @@ enum { NT_ANDROID_TYPE_MEMTAG = 4, }; -// ARM note types. -enum { - NT_ARM_TYPE_PAUTH_ABI_TAG = 1, -}; - // Memory tagging values used in NT_ANDROID_TYPE_MEMTAG notes. enum { // Enumeration to determine the tagging mode. In Android-land, 'SYNC' means @@ -1740,6 +1735,7 @@ enum : unsigned { GNU_PROPERTY_STACK_SIZE = 1, GNU_PROPERTY_NO_COPY_ON_PROTECTED = 2, GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000, + GNU_PROPERTY_AARCH64_FEATURE_PAUTH = 0xc0000001, GNU_PROPERTY_X86_FEATURE_1_AND = 0xc0000002, GNU_PROPERTY_X86_UINT32_OR_LO = 0xc0008000, @@ -1758,6 +1754,26 @@ enum : unsigned { GNU_PROPERTY_AARCH64_FEATURE_1_GCS = 1 << 2, }; +// aarch64 PAuth platforms. +enum : unsigned { + AARCH64_PAUTH_PLATFORM_INVALID = 0x0, + AARCH64_PAUTH_PLATFORM_BAREMETAL = 0x1, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX = 0x10000002, +}; + +// Bit positions of version flags for AARCH64_PAUTH_PLATFORM_LLVM_LINUX. +enum : unsigned { + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS = 0, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS = 1, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS = 2, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS = 3, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR = 4, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR = 5, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI = 6, + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST = + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI, +}; + // x86 processor feature bits. enum : unsigned { GNU_PROPERTY_X86_FEATURE_1_IBT = 1 << 0, diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s index f28d92e..5125317 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-feature-pauth.s @@ -1,98 +1,204 @@ # RUN: rm -rf %t && split-file %s %t && cd %t -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag.s -o tag.o -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-short.s -o tag-short.o -# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-long.s -o tag-long.o - -# RUN: llvm-readelf --notes tag.o | FileCheck --check-prefix NORMAL %s -# RUN: llvm-readelf --notes tag-short.o | FileCheck --check-prefix SHORT %s -# RUN: llvm-readelf --notes tag-long.o | FileCheck --check-prefix LONG %s - -# NORMAL: AArch64 PAuth ABI tag: platform 0x2a, version 0x1 -# SHORT: AArch64 PAuth ABI tag: -# LONG: AArch64 PAuth ABI tag: platform 0x2a, version 0x1, additional info 0xEFCDAB8967452301 - -# RUN: llvm-readobj --notes tag.o | FileCheck --check-prefix LLVM-NORMAL %s -# RUN: llvm-readobj --notes tag-short.o | FileCheck --check-prefix LLVM-SHORT %s -# RUN: llvm-readobj --notes tag-long.o | FileCheck --check-prefix LLVM-LONG %s - -# LLVM-SHORT: Notes [ -# LLVM-SHORT-NEXT: NoteSection { -# LLVM-SHORT-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-SHORT-NEXT: Offset: 0x40 -# LLVM-SHORT-NEXT: Size: 0x1C -# LLVM-SHORT-NEXT: Note { -# LLVM-SHORT-NEXT: Owner: ARM -# LLVM-SHORT-NEXT: Data size: 0xC -# LLVM-SHORT-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-SHORT-NEXT: Description data ( -# LLVM-SHORT-NEXT: 0000: 2A000000 00000000 01000000 -# LLVM-SHORT-NEXT: ) -# LLVM-SHORT-NEXT: } -# LLVM-SHORT-NEXT: } -# LLVM-SHORT-NEXT: ] - -# LLVM-NORMAL: Notes [ -# LLVM-NORMAL-NEXT: NoteSection { -# LLVM-NORMAL-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-NORMAL-NEXT: Offset: 0x40 -# LLVM-NORMAL-NEXT: Size: 0x20 -# LLVM-NORMAL-NEXT: Note { -# LLVM-NORMAL-NEXT: Owner: ARM -# LLVM-NORMAL-NEXT: Data size: 0x10 -# LLVM-NORMAL-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-NORMAL-NEXT: Platform: 42 -# LLVM-NORMAL-NEXT: Version: 1 -# LLVM-NORMAL-NEXT: } -# LLVM-NORMAL-NEXT: } -# LLVM-NORMAL-NEXT: ] - -# LLVM-LONG: Notes [ -# LLVM-LONG-NEXT: NoteSection { -# LLVM-LONG-NEXT: Name: .note.AARCH64-PAUTH-ABI-tag -# LLVM-LONG-NEXT: Offset: 0x40 -# LLVM-LONG-NEXT: Size: 0x28 -# LLVM-LONG-NEXT: Note { -# LLVM-LONG-NEXT: Owner: ARM -# LLVM-LONG-NEXT: Data size: 0x18 -# LLVM-LONG-NEXT: Type: NT_ARM_TYPE_PAUTH_ABI_TAG -# LLVM-LONG-NEXT: Platform: 42 -# LLVM-LONG-NEXT: Version: 1 -# LLVM-LONG-NEXT: Additional info: EFCDAB8967452301 -# LLVM-LONG-NEXT: } -# LLVM-LONG-NEXT: } -# LLVM-LONG-NEXT: ] - -#--- abi-tag.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 16 -.long 1 -.asciz "ARM" - -.quad 42 // platform -.quad 1 // version - -#--- abi-tag-short.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 12 -.long 1 -.asciz "ARM" - -.quad 42 -.word 1 - -#--- abi-tag-long.s - -.section ".note.AARCH64-PAUTH-ABI-tag", "a" -.long 4 -.long 24 -.long 1 -.asciz "ARM" - -.quad 42 // platform -.quad 1 // version -.quad 0x0123456789ABCDEF // extra data +#--- gnu-42-1.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 42 // PAuth ABI platform + .quad 1 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-42-1.s -o gnu-42-1.o +# RUN: llvm-readelf --notes gnu-42-1.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s +# RUN: llvm-readobj --notes gnu-42-1.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x2a (unknown)" -DVERSION=0x1 %s + +# ELF: Displaying notes found in: .note.gnu.property +# ELF-NEXT: Owner Data size Description +# ELF-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) +# ELF-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] + +# OBJ: Notes [ +# OBJ-NEXT: NoteSection { +# OBJ-NEXT: Name: .note.gnu.property +# OBJ-NEXT: Offset: 0x40 +# OBJ-NEXT: Size: 0x28 +# OBJ-NEXT: Note { +# OBJ-NEXT: Owner: GNU +# OBJ-NEXT: Data size: 0x18 +# OBJ-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# OBJ-NEXT: Property [ +# OBJ-NEXT: AArch64 PAuth ABI core info: platform [[PLATFORM]], version [[VERSION]] +# OBJ-NEXT: ] +# OBJ-NEXT: } +# OBJ-NEXT: } +# OBJ-NEXT: ] + +#--- gnu-0-0.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0 // PAuth ABI platform + .quad 0 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0-0.s -o gnu-0-0.o +# RUN: llvm-readelf --notes gnu-0-0.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s +# RUN: llvm-readobj --notes gnu-0-0.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x0 (invalid)" -DVERSION=0x0 %s + +#--- gnu-1-0.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 1 // PAuth ABI platform + .quad 0 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-1-0.s -o gnu-1-0.o +# RUN: llvm-readelf --notes gnu-1-0.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s +# RUN: llvm-readobj --notes gnu-1-0.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x1 (baremetal)" -DVERSION=0x0 %s + +#--- gnu-0x10000002-85.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0x10000002 // PAuth ABI platform + .quad 85 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-85.s -o gnu-0x10000002-85.o +# RUN: llvm-readelf --notes gnu-0x10000002-85.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" \ +# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s +# RUN: llvm-readobj --notes gnu-0x10000002-85.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" \ +# RUN: -DVERSION="0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini)" %s + +#--- gnu-0x10000002-128.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 16 // Data size + .quad 0x10000002 // PAuth ABI platform + .quad 128 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-0x10000002-128.s -o gnu-0x10000002-128.o +# RUN: llvm-readelf --notes gnu-0x10000002-128.o | \ +# RUN: FileCheck --check-prefix=ELF -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s +# RUN: llvm-readobj --notes gnu-0x10000002-128.o | \ +# RUN: FileCheck --check-prefix=OBJ -DPLATFORM="0x10000002 (llvm_linux)" -DVERSION="0x80 (unknown)" %s + +#--- gnu-short.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 12 // Data size + .quad 42 // PAuth ABI platform + .word 1 // PAuth ABI version + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-short.s -o gnu-short.o +# RUN: llvm-readelf --notes gnu-short.o | \ +# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=28 -DDATASIZE=18 \ +# RUN: -DERR="" %s +# RUN: llvm-readobj --notes gnu-short.o | \ +# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=28 -DDATASIZE=18 \ +# RUN: -DERR="" %s + +# ELF-ERR: Displaying notes found in: .note.gnu.property +# ELF-ERR-NEXT: Owner Data size Description +# ELF-ERR-NEXT: GNU 0x000000[[DATASIZE]] NT_GNU_PROPERTY_TYPE_0 (property note) +# ELF-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] + +# OBJ-ERR: Notes [ +# OBJ-ERR-NEXT: NoteSection { +# OBJ-ERR-NEXT: Name: .note.gnu.property +# OBJ-ERR-NEXT: Offset: 0x40 +# OBJ-ERR-NEXT: Size: 0x[[SIZE]] +# OBJ-ERR-NEXT: Note { +# OBJ-ERR-NEXT: Owner: GNU +# OBJ-ERR-NEXT: Data size: 0x[[DATASIZE]] +# OBJ-ERR-NEXT: Type: NT_GNU_PROPERTY_TYPE_0 (property note) +# OBJ-ERR-NEXT: Property [ +# OBJ-ERR-NEXT: AArch64 PAuth ABI core info: [[ERR]] +# OBJ-ERR-NEXT: ] +# OBJ-ERR-NEXT: } +# OBJ-ERR-NEXT: } +# OBJ-ERR-NEXT: ] + +#--- gnu-long.s +.section ".note.gnu.property", "a" + .long 4 // Name length is always 4 ("GNU") + .long end - begin // Data length + .long 5 // Type: NT_GNU_PROPERTY_TYPE_0 + .asciz "GNU" // Name + .p2align 3 +begin: + # PAuth ABI property note + .long 0xc0000001 // Type: GNU_PROPERTY_AARCH64_FEATURE_PAUTH + .long 24 // Data size + .quad 42 // PAuth ABI platform + .quad 1 // PAuth ABI version + .quad 0x0123456789ABCDEF + .p2align 3 // Align to 8 byte for 64 bit +end: + +# RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu gnu-long.s -o gnu-long.o +# RUN: llvm-readelf --notes gnu-long.o | \ +# RUN: FileCheck --check-prefix=ELF-ERR -DSIZE=30 -DDATASIZE=20 \ +# RUN: -DERR="" %s +# RUN: llvm-readobj --notes gnu-long.o | \ +# RUN: FileCheck --check-prefix=OBJ-ERR -DSIZE=30 -DDATASIZE=20 \ +# RUN: -DERR="" %s diff --git a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s index 377e6f9..b517f0b 100644 --- a/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s +++ b/llvm/test/tools/llvm-readobj/ELF/AArch64/aarch64-note-gnu-property.s @@ -1,3 +1,5 @@ +// See tests for GNU_PROPERTY_AARCH64_FEATURE_PAUTH in aarch64-feature-pauth.s + // RUN: llvm-mc -filetype=obj -triple aarch64-linux-gnu %s -o %t // RUN: llvm-readelf --notes %t | FileCheck %s --check-prefix=GNU // RUN: llvm-readobj --notes %t | FileCheck %s --check-prefix=LLVM diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index 4b406ef..d6dda61 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -61,6 +61,7 @@ #include "llvm/Support/SystemZ/zOSSupport.h" #include "llvm/Support/raw_ostream.h" #include +#include #include #include #include @@ -5105,6 +5106,73 @@ template void GNUELFDumper::printAddrsig() { } } +template +static bool printAArch64PAuthABICoreInfo(raw_ostream &OS, uint32_t DataSize, + ArrayRef Desc) { + OS << " AArch64 PAuth ABI core info: "; + // DataSize - size without padding, Desc.size() - size with padding + if (DataSize != 16) { + OS << format("", DataSize); + return false; + } + + uint64_t Platform = + support::endian::read64(Desc.data() + 0); + uint64_t Version = support::endian::read64(Desc.data() + 8); + + const char *PlatformDesc = [Platform]() { + switch (Platform) { + case AARCH64_PAUTH_PLATFORM_INVALID: + return "invalid"; + case AARCH64_PAUTH_PLATFORM_BAREMETAL: + return "baremetal"; + case AARCH64_PAUTH_PLATFORM_LLVM_LINUX: + return "llvm_linux"; + default: + return "unknown"; + } + }(); + + std::string VersionDesc = [Platform, Version]() -> std::string { + if (Platform != AARCH64_PAUTH_PLATFORM_LLVM_LINUX) + return ""; + if (Version >= (1 << (AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST + 1))) + return "unknown"; + + std::array + Flags; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INTRINSICS] = "Intrinsics"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_CALLS] = "Calls"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_RETURNS] = "Returns"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_AUTHTRAPS] = "AuthTraps"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRADDRDISCR] = + "VTPtrAddressDiscrimination"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_VPTRTYPEDISCR] = + "VTPtrTypeDiscrimination"; + Flags[AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI] = "InitFini"; + + static_assert(AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_INITFINI == + AARCH64_PAUTH_PLATFORM_LLVM_LINUX_VERSION_LAST, + "Update when new enum items are defined"); + + std::string Desc; + for (uint32_t I = 0, End = Flags.size(); I < End; ++I) { + if (!(Version & (1 << I))) + Desc += '!'; + Desc += + Twine("PointerAuth" + Flags[I] + (I == End - 1 ? "" : ", ")).str(); + } + return Desc; + }(); + + OS << format("platform 0x%" PRIx64 " (%s), version 0x%" PRIx64, Platform, + PlatformDesc, Version); + if (!VersionDesc.empty()) + OS << format(" (%s)", VersionDesc.c_str()); + + return true; +} + template static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, ArrayRef Data) { @@ -5162,6 +5230,9 @@ static std::string getGNUProperty(uint32_t Type, uint32_t DataSize, if (PrData) OS << format("", PrData); return OS.str(); + case GNU_PROPERTY_AARCH64_FEATURE_PAUTH: + printAArch64PAuthABICoreInfo(OS, DataSize, Data); + return OS.str(); case GNU_PROPERTY_X86_FEATURE_2_NEEDED: case GNU_PROPERTY_X86_FEATURE_2_USED: OS << "x86 feature " @@ -5364,29 +5435,6 @@ static bool printAndroidNote(raw_ostream &OS, uint32_t NoteType, } template -static bool printAArch64Note(raw_ostream &OS, uint32_t NoteType, - ArrayRef Desc) { - if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) - return false; - - OS << " AArch64 PAuth ABI tag: "; - if (Desc.size() < 16) { - OS << format("", Desc.size()); - return false; - } - - uint64_t Platform = endian::read64(Desc.data() + 0); - uint64_t Version = endian::read64(Desc.data() + 8); - OS << format("platform 0x%" PRIx64 ", version 0x%" PRIx64, Platform, Version); - - if (Desc.size() > 16) - OS << ", additional info 0x" - << toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16)); - - return true; -} - -template void GNUELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -5783,10 +5831,6 @@ const NoteType AndroidNoteTypes[] = { "NT_ANDROID_TYPE_MEMTAG (Android memory tagging information)"}, }; -const NoteType ARMNoteTypes[] = { - {ELF::NT_ARM_TYPE_PAUTH_ABI_TAG, "NT_ARM_TYPE_PAUTH_ABI_TAG"}, -}; - const NoteType CoreNoteTypes[] = { {ELF::NT_PRSTATUS, "NT_PRSTATUS (prstatus structure)"}, {ELF::NT_FPREGSET, "NT_FPREGSET (floating point registers)"}, @@ -5905,8 +5949,6 @@ StringRef getNoteTypeName(const typename ELFT::Note &Note, unsigned ELFType) { return FindNote(LLVMOMPOFFLOADNoteTypes); if (Name == "Android") return FindNote(AndroidNoteTypes); - if (Name == "ARM") - return FindNote(ARMNoteTypes); if (ELFType == ELF::ET_CORE) return FindNote(CoreNoteTypes); @@ -6062,9 +6104,6 @@ template void GNUELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNote(OS, Type, Descriptor)) return Error::success(); - } else if (Name == "ARM") { - if (printAArch64Note(OS, Type, Descriptor)) - return Error::success(); } if (!Descriptor.empty()) { OS << " description data:"; @@ -7703,27 +7742,6 @@ static bool printAndroidNoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, } template -static bool printAarch64NoteLLVMStyle(uint32_t NoteType, ArrayRef Desc, - ScopedPrinter &W) { - if (NoteType != NT_ARM_TYPE_PAUTH_ABI_TAG) - return false; - - if (Desc.size() < 16) - return false; - - uint64_t platform = endian::read64(Desc.data() + 0); - uint64_t version = endian::read64(Desc.data() + 8); - W.printNumber("Platform", platform); - W.printNumber("Version", version); - - if (Desc.size() > 16) - W.printString("Additional info", - toHex(ArrayRef(Desc.data() + 16, Desc.size() - 16))); - - return true; -} - -template void LLVMELFDumper::printMemtag( const ArrayRef> DynamicEntries, const ArrayRef AndroidNoteDesc, @@ -7859,9 +7877,6 @@ template void LLVMELFDumper::printNotes() { } else if (Name == "Android") { if (printAndroidNoteLLVMStyle(Type, Descriptor, W)) return Error::success(); - } else if (Name == "ARM") { - if (printAarch64NoteLLVMStyle(Type, Descriptor, W)) - return Error::success(); } if (!Descriptor.empty()) { W.printBinaryBlock("Description data", Descriptor); -- cgit v1.1 From d4cd65ecf2546e509f43363f96364c976f49b9da Mon Sep 17 00:00:00 2001 From: Andreas Jonson Date: Thu, 4 Apr 2024 08:48:11 +0200 Subject: [LVI] Handle range attributes (#86413) This adds handling of range attribute for return values of Call and Invoke in getFromRangeMetadata and handling of argument with range attribute in solveBlockValueNonLocal. There is one additional check of the range metadata at line 1120 in getValueFromSimpleICmpCondition that is not covered in this PR as after https://github.com/llvm/llvm-project/pull/75311 there is no test that cover that check any more and I have not been able to create a test that trigger that code. --- llvm/lib/Analysis/LazyValueInfo.cpp | 13 +++- .../Transforms/CorrelatedValuePropagation/range.ll | 86 +++++++++++++++++++--- 2 files changed, 84 insertions(+), 15 deletions(-) diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp index b8bc811..6cded82 100644 --- a/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/llvm/lib/Analysis/LazyValueInfo.cpp @@ -588,10 +588,14 @@ LazyValueInfoImpl::getBlockValue(Value *Val, BasicBlock *BB, static ValueLatticeElement getFromRangeMetadata(Instruction *BBI) { switch (BBI->getOpcode()) { - default: break; - case Instruction::Load: + default: + break; case Instruction::Call: case Instruction::Invoke: + if (std::optional Range = cast(BBI)->getRange()) + return ValueLatticeElement::getRange(*Range); + [[fallthrough]]; + case Instruction::Load: if (MDNode *Ranges = BBI->getMetadata(LLVMContext::MD_range)) if (isa(BBI->getType())) { return ValueLatticeElement::getRange( @@ -706,10 +710,11 @@ std::optional LazyValueInfoImpl::solveBlockValueNonLocal(Value *Val, BasicBlock *BB) { ValueLatticeElement Result; // Start Undefined. - // If this is the entry block, we must be asking about an argument. The - // value is overdefined. + // If this is the entry block, we must be asking about an argument. if (BB->isEntryBlock()) { assert(isa(Val) && "Unknown live-in to the entry block"); + if (std::optional Range = cast(Val)->getRange()) + return ValueLatticeElement::getRange(*Range); return ValueLatticeElement::getOverdefined(); } diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll index cc66cbe..ce1b591 100644 --- a/llvm/test/Transforms/CorrelatedValuePropagation/range.ll +++ b/llvm/test/Transforms/CorrelatedValuePropagation/range.ll @@ -102,9 +102,9 @@ if.end8: define i32 @test4(i32 %c) nounwind { ; CHECK-LABEL: @test4( ; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 1, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 2, label [[SW_BB]] -; CHECK-NEXT: i32 4, label [[SW_BB]] +; CHECK-NEXT: i32 1, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 2, label [[SW_BB]] +; CHECK-NEXT: i32 4, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: br i1 true, label [[IF_THEN:%.*]], label [[IF_END:%.*]] @@ -207,8 +207,8 @@ define i1 @test7(i32 %c) nounwind { ; CHECK-LABEL: @test7( ; CHECK-NEXT: entry: ; CHECK-NEXT: switch i32 [[C:%.*]], label [[SW_DEFAULT:%.*]] [ -; CHECK-NEXT: i32 6, label [[SW_BB:%.*]] -; CHECK-NEXT: i32 7, label [[SW_BB]] +; CHECK-NEXT: i32 6, label [[SW_BB:%.*]] +; CHECK-NEXT: i32 7, label [[SW_BB]] ; CHECK-NEXT: ] ; CHECK: sw.bb: ; CHECK-NEXT: ret i1 true @@ -790,8 +790,8 @@ define i32 @test18(i8 %a) { ; CHECK-NEXT: br label [[DISPATCH:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: switch i8 [[A]], label [[DISPATCH]] [ -; CHECK-NEXT: i8 93, label [[TARGET93:%.*]] -; CHECK-NEXT: i8 -111, label [[DISPATCH]] +; CHECK-NEXT: i8 93, label [[TARGET93:%.*]] +; CHECK-NEXT: i8 -111, label [[DISPATCH]] ; CHECK-NEXT: ] ; CHECK: target93: ; CHECK-NEXT: ret i32 93 @@ -817,8 +817,8 @@ define i8 @test19(i8 %a) { ; CHECK-NEXT: br label [[DISPATCH:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: switch i8 [[A]], label [[DISPATCH]] [ -; CHECK-NEXT: i8 93, label [[TARGET93:%.*]] -; CHECK-NEXT: i8 -111, label [[DISPATCH]] +; CHECK-NEXT: i8 93, label [[TARGET93:%.*]] +; CHECK-NEXT: i8 -111, label [[DISPATCH]] ; CHECK-NEXT: ] ; CHECK: target93: ; CHECK-NEXT: ret i8 96 @@ -846,8 +846,8 @@ define i1 @test20(i64 %a) { ; CHECK-NEXT: br label [[DISPATCH:%.*]] ; CHECK: dispatch: ; CHECK-NEXT: switch i64 [[A]], label [[DEFAULT:%.*]] [ -; CHECK-NEXT: i64 0, label [[EXIT2:%.*]] -; CHECK-NEXT: i64 -2147483647, label [[EXIT2]] +; CHECK-NEXT: i64 0, label [[EXIT2:%.*]] +; CHECK-NEXT: i64 -2147483647, label [[EXIT2]] ; CHECK-NEXT: ] ; CHECK: default: ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[B]], 0 @@ -1123,6 +1123,70 @@ else: ret i1 true } +define i1 @icmp_eq_range_attr(i8 range(i8 1, 0) %i) { +; CHECK-LABEL: @icmp_eq_range_attr( +; CHECK-NEXT: ret i1 false +; + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + +define i1 @neg_icmp_eq_range_attr(i8 range(i8 -1, 1) %i) { +; CHECK-LABEL: @neg_icmp_eq_range_attr( +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[I:%.*]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + +declare range(i8 1, 0) i8 @returns_non_zero_range_helper() +declare range(i8 -1, 1) i8 @returns_contain_zero_range_helper() + +define i1 @icmp_eq_range_return() { +; CHECK-LABEL: @icmp_eq_range_return( +; CHECK-NEXT: [[I:%.*]] = call i8 @returns_non_zero_range_helper() +; CHECK-NEXT: ret i1 false +; + %i = call i8 @returns_non_zero_range_helper() + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + +define i1 @neg_icmp_eq_range_return() { +; CHECK-LABEL: @neg_icmp_eq_range_return( +; CHECK-NEXT: [[I:%.*]] = call i8 @returns_contain_zero_range_helper() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[I]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %i = call i8 @returns_contain_zero_range_helper() + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + +declare i8 @returns_i8_helper() + +define i1 @icmp_eq_range_call() { +; CHECK-LABEL: @icmp_eq_range_call( +; CHECK-NEXT: [[I:%.*]] = call range(i8 1, 0) i8 @returns_i8_helper() +; CHECK-NEXT: ret i1 false +; + %i = call range(i8 1, 0) i8 @returns_i8_helper() + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + +define i1 @neg_icmp_eq_range_call() { +; CHECK-LABEL: @neg_icmp_eq_range_call( +; CHECK-NEXT: [[I:%.*]] = call range(i8 0, 11) i8 @returns_i8_helper() +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[I]], 0 +; CHECK-NEXT: ret i1 [[CMP]] +; + %i = call range(i8 0, 11) i8 @returns_i8_helper() + %cmp = icmp eq i8 %i, 0 + ret i1 %cmp +} + declare i16 @llvm.ctlz.i16(i16, i1) declare i16 @llvm.cttz.i16(i16, i1) declare i16 @llvm.ctpop.i16(i16) -- cgit v1.1 From d6e458219897fad0e460e663833b2190af48c06d Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Thu, 4 Apr 2024 09:34:37 +0200 Subject: [MLIR][LLVM][Mem2Reg] Relax type equality requirement for load and store (#87504) This commit relaxes Mem2Reg's type equality requirement for the LLVM dialect's load and store operations. For now, we only allow loads to be promoted if the reaching definition can be casted into a value of the target type. For stores, all type checks are removed, as a non-volatile store that does not write out the alloca's pointer can always be deleted. --- mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 38 ++++- mlir/test/Dialect/LLVMIR/mem2reg.mlir | 197 ++++++++++++++++++++++++++ 2 files changed, 231 insertions(+), 4 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 06c1fdd..f8fd3bd 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -126,6 +126,13 @@ Value LLVM::StoreOp::getStored(const MemorySlot &slot, RewriterBase &rewriter) { return getValue(); } +/// Checks that two types are the same or can be cast into one another. +static bool areCastCompatible(const DataLayout &layout, Type lhs, Type rhs) { + return lhs == rhs || (!isa(lhs) && + !isa(rhs) && + layout.getTypeSize(lhs) == layout.getTypeSize(rhs)); +} + bool LLVM::LoadOp::canUsesBeRemoved( const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, SmallVectorImpl &newBlockingUses, @@ -138,7 +145,29 @@ bool LLVM::LoadOp::canUsesBeRemoved( // be removed (provided it loads the exact stored value and is not // volatile). return blockingUse == slot.ptr && getAddr() == slot.ptr && - getResult().getType() == slot.elemType && !getVolatile_(); + areCastCompatible(dataLayout, getResult().getType(), slot.elemType) && + !getVolatile_(); +} + +/// Constructs operations that convert `inputValue` into a new value of type +/// `targetType`. Assumes that this conversion is possible. +static Value createConversionSequence(RewriterBase &rewriter, Location loc, + Value inputValue, Type targetType) { + if (inputValue.getType() == targetType) + return inputValue; + + if (!isa(targetType) && + !isa(inputValue.getType())) + return rewriter.createOrFold(loc, targetType, inputValue); + + if (!isa(targetType)) + return rewriter.createOrFold(loc, targetType, inputValue); + + if (!isa(inputValue.getType())) + return rewriter.createOrFold(loc, targetType, inputValue); + + return rewriter.createOrFold(loc, targetType, + inputValue); } DeletionKind LLVM::LoadOp::removeBlockingUses( @@ -146,7 +175,9 @@ DeletionKind LLVM::LoadOp::removeBlockingUses( RewriterBase &rewriter, Value reachingDefinition) { // `canUsesBeRemoved` checked this blocking use must be the loaded slot // pointer. - rewriter.replaceAllUsesWith(getResult(), reachingDefinition); + Value newResult = createConversionSequence( + rewriter, getLoc(), reachingDefinition, getResult().getType()); + rewriter.replaceAllUsesWith(getResult(), newResult); return DeletionKind::Delete; } @@ -161,8 +192,7 @@ bool LLVM::StoreOp::canUsesBeRemoved( // fine, provided we are currently promoting its target value. Don't allow a // store OF the slot pointer, only INTO the slot pointer. return blockingUse == slot.ptr && getAddr() == slot.ptr && - getValue() != slot.ptr && getValue().getType() == slot.elemType && - !getVolatile_(); + getValue() != slot.ptr && !getVolatile_(); } DeletionKind LLVM::StoreOp::removeBlockingUses( diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir index 90e56c1..d6d5e1b 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir @@ -697,3 +697,200 @@ llvm.func @transitive_reaching_def() -> !llvm.ptr { %3 = llvm.load %1 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr llvm.return %3 : !llvm.ptr } + +// ----- + +// CHECK-LABEL: @load_int_from_float +llvm.func @load_int_from_float() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @load_float_from_int +llvm.func @load_float_from_int() -> f32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : i32 to f32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : f32 +} + +// ----- + +// CHECK-LABEL: @load_int_from_vector +llvm.func @load_int_from_vector() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : vector<2xi16> to i32 + // CHECK: llvm.return %[[BITCAST:.*]] + llvm.return %2 : i32 +} + +// ----- + +// LLVM arrays cannot be bitcasted, so the following cannot be promoted. + +// CHECK-LABEL: @load_int_from_array +llvm.func @load_int_from_array() -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x !llvm.array<2 x i16> : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK-NOT: llvm.bitcast + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_int_to_float +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @store_int_to_float(%arg: i32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: llvm.return %[[ARG]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_float_to_int +// CHECK-SAME: %[[ARG:.*]]: f32 +llvm.func @store_float_to_int(%arg: f32) -> i32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : f32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32 + // CHECK: llvm.return %[[BITCAST]] + llvm.return %2 : i32 +} + +// ----- + +// CHECK-LABEL: @store_int_to_vector +// CHECK-SAME: %[[ARG:.*]]: i32 +llvm.func @store_int_to_vector(%arg: i32) -> vector<4xi8> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x vector<2xi16> {alignment = 4 : i64} : (i32) -> !llvm.ptr + llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> vector<4xi8> + // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8> + // CHECK: llvm.return %[[BITCAST]] + llvm.return %2 : vector<4xi8> +} + +// ----- + +// CHECK-LABEL: @load_ptr_from_int +llvm.func @load_ptr_from_int() -> !llvm.ptr { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.inttoptr %[[UNDEF]] : i64 to !llvm.ptr + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : !llvm.ptr +} + +// ----- + +// CHECK-LABEL: @load_int_from_ptr +llvm.func @load_int_from_ptr() -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.ptrtoint %[[UNDEF]] : !llvm.ptr to i64 + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : i64 +} + +// ----- + +// CHECK-LABEL: @load_ptr_addrspace_cast +llvm.func @load_ptr_addrspace_cast() -> !llvm.ptr<2> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> + // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef + // CHECK: %[[CAST:.*]] = llvm.addrspacecast %[[UNDEF]] : !llvm.ptr<1> to !llvm.ptr<2> + // CHECK: llvm.return %[[CAST:.*]] + llvm.return %2 : !llvm.ptr<2> +} + +// ----- + +// CHECK-LABEL: @load_smaller_int +llvm.func @load_smaller_int() -> i16 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i16 + llvm.return %2 : i16 +} + +// ----- + +// CHECK-LABEL: @load_different_type_smaller +llvm.func @load_different_type_smaller() -> f32 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 + llvm.return %2 : f32 +} + +// ----- + +// This alloca is too small for the load, still, mem2reg should not touch it. + +// CHECK-LABEL: @impossible_load +llvm.func @impossible_load() -> f64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f64 + llvm.return %2 : f64 +} + +// ----- + +// Verifies that mem2reg does not introduce address space casts of pointers +// with different bitsize. + +module attributes { dlti.dl_spec = #dlti.dl_spec< + #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi64>>, + #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi64>> +>} { + + // CHECK-LABEL: @load_ptr_addrspace_cast_different_size + llvm.func @load_ptr_addrspace_cast_different_size() -> !llvm.ptr<2> { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK: llvm.alloca + %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> + llvm.return %2 : !llvm.ptr<2> + } +} -- cgit v1.1 From e69cab7f37cf6f35306356cddb26049fd6138df7 Mon Sep 17 00:00:00 2001 From: Jan Patrick Lehr Date: Thu, 4 Apr 2024 09:37:45 +0200 Subject: [Flang] Make SLES 15 build tests (#87498) SLES 15 comes with a GCC 7.5 as default, which does not support the C++17 `` header. This results in build errors when trying to run `check-flang`. This patch addresses that and uses the older `std::stol` for the string -> number conversion to allow the SLES 15 buildbot (https://lab.llvm.org/staging/#/builders/193) to turn green. --- flang/unittests/Runtime/Time.cpp | 35 +++++++++++++++-------------------- 1 file changed, 15 insertions(+), 20 deletions(-) diff --git a/flang/unittests/Runtime/Time.cpp b/flang/unittests/Runtime/Time.cpp index ec0caa7..5c93282 100644 --- a/flang/unittests/Runtime/Time.cpp +++ b/flang/unittests/Runtime/Time.cpp @@ -12,7 +12,7 @@ #include "flang/Runtime/time-intrinsic.h" #include #include -#include +#include #include using namespace Fortran::runtime; @@ -104,10 +104,9 @@ TEST(TimeIntrinsics, DateAndTime) { EXPECT_TRUE(true); } else { count_t number{-1}; - auto [_, ec]{ - std::from_chars(date.data(), date.data() + date.size(), number)}; - ASSERT_TRUE(ec != std::errc::invalid_argument && - ec != std::errc::result_out_of_range); + // Use stol to allow GCC 7.5 to build tests + number = std::stol(date); + ASSERT_TRUE(errno != ERANGE); EXPECT_GE(number, 0); auto year = number / 10000; auto month = (number - year * 10000) / 100; @@ -121,14 +120,15 @@ TEST(TimeIntrinsics, DateAndTime) { } // Validate time is hhmmss.sss or blank. + std::string acceptedPattern("hhmmss.sss"); if (isBlank(time)) { EXPECT_TRUE(true); } else { count_t number{-1}; - auto [next, ec]{ - std::from_chars(time.data(), time.data() + date.size(), number)}; - ASSERT_TRUE(ec != std::errc::invalid_argument && - ec != std::errc::result_out_of_range); + // Use stol to allow GCC 7.5 to build tests + auto dotPosition = acceptedPattern.find('.'); + number = std::stol(time.substr(0, dotPosition)); + ASSERT_TRUE(errno != ERANGE); ASSERT_GE(number, 0); auto hours = number / 10000; auto minutes = (number - hours * 10000) / 100; @@ -137,15 +137,11 @@ TEST(TimeIntrinsics, DateAndTime) { EXPECT_LE(minutes, 59); // Accept 60 for leap seconds. EXPECT_LE(seconds, 60); - ASSERT_TRUE(next != time.data() + time.size()); - EXPECT_EQ(*next, '.'); + EXPECT_EQ(time.substr(dotPosition, 1), "."); count_t milliseconds{-1}; - ASSERT_TRUE(next + 1 != time.data() + time.size()); - auto [_, ec2]{ - std::from_chars(next + 1, time.data() + date.size(), milliseconds)}; - ASSERT_TRUE(ec2 != std::errc::invalid_argument && - ec2 != std::errc::result_out_of_range); + milliseconds = std::stol(time.substr(dotPosition + 1, 3)); + ASSERT_TRUE(errno != ERANGE); EXPECT_GE(milliseconds, 0); EXPECT_LE(milliseconds, 999); } @@ -157,10 +153,9 @@ TEST(TimeIntrinsics, DateAndTime) { ASSERT_TRUE(zone.size() > 1); EXPECT_TRUE(zone[0] == '+' || zone[0] == '-'); count_t number{-1}; - auto [next, ec]{ - std::from_chars(zone.data() + 1, zone.data() + zone.size(), number)}; - ASSERT_TRUE(ec != std::errc::invalid_argument && - ec != std::errc::result_out_of_range); + // Use stol to allow GCC 7.5 to build tests + number = std::stol(zone.substr(1, 4)); + ASSERT_TRUE(errno != ERANGE); ASSERT_GE(number, 0); auto hours = number / 100; auto minutes = number % 100; -- cgit v1.1 From d542cb3175f0c5691808b9c50234788c7be1154f Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 4 Apr 2024 16:56:31 +0900 Subject: [mlir][Interfaces][NFC] `ValueBoundsConstraintSet`: Delete dead code (#86098) There is an assertion that the stop condition is not satisfied for the the starting point at the beginning of `computeBound`. Therefore, that case does not have to be handled later on in that function. --- mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 99598f2..9a3185d 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -286,18 +286,6 @@ LogicalResult ValueBoundsConstraintSet::computeBound( Builder b(value.getContext()); mapOperands.clear(); - if (stopCondition(value, dim)) { - // Special case: If the stop condition is satisfied for the input - // value/dimension, directly return it. - mapOperands.push_back(std::make_pair(value, dim)); - AffineExpr bound = b.getAffineDimExpr(0); - if (type == BoundType::UB) - bound = bound + ubAdjustment; - resultMap = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, - b.getAffineDimExpr(0)); - return success(); - } - // Process the backward slice of `value` (i.e., reverse use-def chain) until // `stopCondition` is met. ValueDim valueDim = std::make_pair(value, dim.value_or(kIndexValue)); -- cgit v1.1 From 35886dc63a2d024e20c10d2e1cb3f5fa5d9f72cc Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 4 Apr 2024 09:56:04 +0200 Subject: [clang] Remove an unintended statement, NFC --- clang/lib/Sema/SemaTemplate.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/clang/lib/Sema/SemaTemplate.cpp b/clang/lib/Sema/SemaTemplate.cpp index a2b8cc1..d3def13 100644 --- a/clang/lib/Sema/SemaTemplate.cpp +++ b/clang/lib/Sema/SemaTemplate.cpp @@ -2711,7 +2711,6 @@ SmallVector TemplateParamsReferencedInTemplateArgumentList( : TemplateParams(TemplateParams.begin(), TemplateParams.end()) {} bool VisitTemplateTypeParmType(TemplateTypeParmType *TTP) { - TTP->getIndex(); MarkAppeared(TTP->getDecl()); return true; } -- cgit v1.1 From 550e09db1ad6e8f28546fa0c24b5582e57e210c4 Mon Sep 17 00:00:00 2001 From: Kadir Cetinkaya Date: Thu, 4 Apr 2024 09:56:15 +0200 Subject: [clangd][NFC] Delete dead code --- clang-tools-extra/clangd/IncludeCleaner.h | 9 --------- 1 file changed, 9 deletions(-) diff --git a/clang-tools-extra/clangd/IncludeCleaner.h b/clang-tools-extra/clangd/IncludeCleaner.h index 387763d..624e211 100644 --- a/clang-tools-extra/clangd/IncludeCleaner.h +++ b/clang-tools-extra/clangd/IncludeCleaner.h @@ -62,15 +62,6 @@ issueIncludeCleanerDiagnostics(ParsedAST &AST, llvm::StringRef Code, const ThreadsafeFS &TFS, HeaderFilter IgnoreHeader = {}); -/// Affects whether standard library includes should be considered for -/// removal. This is off by default for now due to implementation limitations: -/// - macros are not tracked -/// - symbol names without a unique associated header are not tracked -/// - references to std-namespaced C types are not properly tracked: -/// instead of std::size_t -> we see ::size_t -> -/// FIXME: remove this hack once the implementation is good enough. -void setIncludeCleanerAnalyzesStdlib(bool B); - /// Converts the clangd include representation to include-cleaner /// include representation. include_cleaner::Includes convertIncludes(const ParsedAST &); -- cgit v1.1 From 5e4a44380eed172e9f2954f462e94dac97b8e728 Mon Sep 17 00:00:00 2001 From: Matthias Springer Date: Thu, 4 Apr 2024 17:05:47 +0900 Subject: [mlir][Interfaces][NFC] `ValueBoundsConstraintSet`: Pass stop condition in the constructor (#86099) This commit changes the API of `ValueBoundsConstraintSet`: the stop condition is now passed to the constructor instead of `processWorklist`. That makes it easier to add items to the worklist multiple times and process them in a consistent manner. The current `ValueBoundsConstraintSet` is passed as a reference to the stop function, so that the stop function can be defined before the the `ValueBoundsConstraintSet` is constructed. This change is in preparation of adding support for branches. --- .../Vector/IR/ScalableValueBoundsConstraintSet.h | 9 ++- .../mlir/Interfaces/ValueBoundsOpInterface.h | 16 +++-- .../Dialect/Affine/Transforms/ReifyValueBounds.cpp | 6 +- .../Dialect/Arith/Transforms/ReifyValueBounds.cpp | 6 +- .../lib/Dialect/Linalg/Transforms/HoistPadding.cpp | 2 +- .../Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp | 2 +- .../Vector/IR/ScalableValueBoundsConstraintSet.cpp | 21 ++++-- mlir/lib/Interfaces/ValueBoundsOpInterface.cpp | 82 ++++++++++++---------- .../lib/Dialect/Affine/TestReifyValueBounds.cpp | 9 ++- 9 files changed, 90 insertions(+), 63 deletions(-) diff --git a/mlir/include/mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h b/mlir/include/mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h index 31e19ff..67a6581 100644 --- a/mlir/include/mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h +++ b/mlir/include/mlir/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.h @@ -29,9 +29,12 @@ struct ValueBoundsConstraintSet : protected ::mlir::ValueBoundsConstraintSet { struct ScalableValueBoundsConstraintSet : public llvm::RTTIExtends { - ScalableValueBoundsConstraintSet(MLIRContext *context, unsigned vscaleMin, - unsigned vscaleMax) - : RTTIExtends(context), vscaleMin(vscaleMin), vscaleMax(vscaleMax){}; + ScalableValueBoundsConstraintSet( + MLIRContext *context, + ValueBoundsConstraintSet::StopConditionFn stopCondition, + unsigned vscaleMin, unsigned vscaleMax) + : RTTIExtends(context, stopCondition), vscaleMin(vscaleMin), + vscaleMax(vscaleMax) {}; using RTTIExtends::bound; using RTTIExtends::StopConditionFn; diff --git a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h index bdfd689..83107a3 100644 --- a/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h +++ b/mlir/include/mlir/Interfaces/ValueBoundsOpInterface.h @@ -117,8 +117,9 @@ public: /// /// The first parameter of the function is the shaped value/index-typed /// value. The second parameter is the dimension in case of a shaped value. - using StopConditionFn = - function_ref /*dim*/)>; + /// The third parameter is this constraint set. + using StopConditionFn = std::function /*dim*/, ValueBoundsConstraintSet &cstr)>; /// Compute a bound for the given index-typed value or shape dimension size. /// The computed bound is stored in `resultMap`. The operands of the bound are @@ -271,22 +272,20 @@ protected: /// An index-typed value or the dimension of a shaped-type value. using ValueDim = std::pair; - ValueBoundsConstraintSet(MLIRContext *ctx); + ValueBoundsConstraintSet(MLIRContext *ctx, StopConditionFn stopCondition); /// Populates the constraint set for a value/map without actually computing /// the bound. Returns the position for the value/map (via the return value /// and `posOut` output parameter). int64_t populateConstraintsSet(Value value, - std::optional dim = std::nullopt, - StopConditionFn stopCondition = nullptr); + std::optional dim = std::nullopt); int64_t populateConstraintsSet(AffineMap map, ValueDimList mapOperands, - StopConditionFn stopCondition = nullptr, int64_t *posOut = nullptr); /// Iteratively process all elements on the worklist until an index-typed /// value or shaped value meets `stopCondition`. Such values are not processed /// any further. - void processWorklist(StopConditionFn stopCondition); + void processWorklist(); /// Bound the given column in the underlying constraint set by the given /// expression. @@ -333,6 +332,9 @@ protected: /// Builder for constructing affine expressions. Builder builder; + + /// The current stop condition function. + StopConditionFn stopCondition = nullptr; }; } // namespace mlir diff --git a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp index 37b36f7..117ee8e 100644 --- a/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/ReifyValueBounds.cpp @@ -84,7 +84,8 @@ FailureOr mlir::affine::reifyShapedValueDimBound( OpBuilder &b, Location loc, presburger::BoundType type, Value value, int64_t dim, ValueBoundsConstraintSet::StopConditionFn stopCondition, bool closedUB) { - auto reifyToOperands = [&](Value v, std::optional d) { + auto reifyToOperands = [&](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { // We are trying to reify a bound for `value` in terms of the owning op's // operands. Construct a stop condition that evaluates to "true" for any SSA // value except for `value`. I.e., the bound will be computed in terms of @@ -100,7 +101,8 @@ FailureOr mlir::affine::reifyShapedValueDimBound( FailureOr mlir::affine::reifyIndexValueBound( OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition, bool closedUB) { - auto reifyToOperands = [&](Value v, std::optional d) { + auto reifyToOperands = [&](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { return v != value; }; return reifyValueBound(b, loc, type, value, /*dim=*/std::nullopt, diff --git a/mlir/lib/Dialect/Arith/Transforms/ReifyValueBounds.cpp b/mlir/lib/Dialect/Arith/Transforms/ReifyValueBounds.cpp index 8d9fd14..fad2212 100644 --- a/mlir/lib/Dialect/Arith/Transforms/ReifyValueBounds.cpp +++ b/mlir/lib/Dialect/Arith/Transforms/ReifyValueBounds.cpp @@ -119,7 +119,8 @@ FailureOr mlir::arith::reifyShapedValueDimBound( OpBuilder &b, Location loc, presburger::BoundType type, Value value, int64_t dim, ValueBoundsConstraintSet::StopConditionFn stopCondition, bool closedUB) { - auto reifyToOperands = [&](Value v, std::optional d) { + auto reifyToOperands = [&](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { // We are trying to reify a bound for `value` in terms of the owning op's // operands. Construct a stop condition that evaluates to "true" for any SSA // value expect for `value`. I.e., the bound will be computed in terms of @@ -135,7 +136,8 @@ FailureOr mlir::arith::reifyShapedValueDimBound( FailureOr mlir::arith::reifyIndexValueBound( OpBuilder &b, Location loc, presburger::BoundType type, Value value, ValueBoundsConstraintSet::StopConditionFn stopCondition, bool closedUB) { - auto reifyToOperands = [&](Value v, std::optional d) { + auto reifyToOperands = [&](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { return v != value; }; return reifyValueBound(b, loc, type, value, /*dim=*/std::nullopt, diff --git a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp index b32ea8e..c3a08ce 100644 --- a/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp +++ b/mlir/lib/Dialect/Linalg/Transforms/HoistPadding.cpp @@ -468,7 +468,7 @@ HoistPaddingAnalysis::getHoistedPackedTensorSizes(RewriterBase &rewriter, FailureOr loopUb = affine::reifyIndexValueBound( rewriter, loc, presburger::BoundType::UB, forOp.getUpperBound(), /*stopCondition=*/ - [&](Value v, std::optional d) { + [&](Value v, std::optional d, ValueBoundsConstraintSet &cstr) { if (v == forOp.getUpperBound()) return false; // Compute a bound that is independent of any affine op results. diff --git a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp index cb36e0c..1e13e60 100644 --- a/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp +++ b/mlir/lib/Dialect/SCF/IR/ValueBoundsOpInterfaceImpl.cpp @@ -58,7 +58,7 @@ struct ForOpInterface ValueDimList boundOperands; LogicalResult status = ValueBoundsConstraintSet::computeBound( bound, boundOperands, BoundType::EQ, yieldedValue, dim, - [&](Value v, std::optional d) { + [&](Value v, std::optional d, ValueBoundsConstraintSet &cstr) { // Stop when reaching a block argument of the loop body. if (auto bbArg = llvm::dyn_cast(v)) return bbArg.getOwner()->getParentOp() == forOp; diff --git a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp index 6d7e3bc..52359fa8 100644 --- a/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp +++ b/mlir/lib/Dialect/Vector/IR/ScalableValueBoundsConstraintSet.cpp @@ -47,17 +47,26 @@ ScalableValueBoundsConstraintSet::computeScalableBound( unsigned vscaleMax, presburger::BoundType boundType, bool closedUB, StopConditionFn stopCondition) { using namespace presburger; - assert(vscaleMin <= vscaleMax); - ScalableValueBoundsConstraintSet scalableCstr(value.getContext(), vscaleMin, - vscaleMax); - int64_t pos = scalableCstr.populateConstraintsSet(value, dim, stopCondition); + // No stop condition specified: Keep adding constraints until the worklist + // is empty. + auto defaultStopCondition = [&](Value v, std::optional dim, + mlir::ValueBoundsConstraintSet &cstr) { + return false; + }; + + ScalableValueBoundsConstraintSet scalableCstr( + value.getContext(), stopCondition ? stopCondition : defaultStopCondition, + vscaleMin, vscaleMax); + int64_t pos = scalableCstr.populateConstraintsSet(value, dim); // Project out all variables apart from vscale. // This should result in constraints in terms of vscale only. - scalableCstr.projectOut( - [&](ValueDim p) { return p.first != scalableCstr.getVscaleValue(); }); + auto projectOutFn = [&](ValueDim p) { + return p.first != scalableCstr.getVscaleValue(); + }; + scalableCstr.projectOut(projectOutFn); assert(scalableCstr.cstr.getNumDimAndSymbolVars() == scalableCstr.positionToValueDim.size() && diff --git a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp index 9a3185d..0d362c7 100644 --- a/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp +++ b/mlir/lib/Interfaces/ValueBoundsOpInterface.cpp @@ -67,8 +67,11 @@ static std::optional getConstantIntValue(OpFoldResult ofr) { return std::nullopt; } -ValueBoundsConstraintSet::ValueBoundsConstraintSet(MLIRContext *ctx) - : builder(ctx) {} +ValueBoundsConstraintSet::ValueBoundsConstraintSet( + MLIRContext *ctx, StopConditionFn stopCondition) + : builder(ctx), stopCondition(stopCondition) { + assert(stopCondition && "expected non-null stop condition"); +} char ValueBoundsConstraintSet::ID = 0; @@ -193,7 +196,8 @@ static Operation *getOwnerOfValue(Value value) { return value.getDefiningOp(); } -void ValueBoundsConstraintSet::processWorklist(StopConditionFn stopCondition) { +void ValueBoundsConstraintSet::processWorklist() { + LLVM_DEBUG(llvm::dbgs() << "Processing value bounds worklist...\n"); while (!worklist.empty()) { int64_t pos = worklist.front(); worklist.pop(); @@ -214,13 +218,19 @@ void ValueBoundsConstraintSet::processWorklist(StopConditionFn stopCondition) { // Do not process any further if the stop condition is met. auto maybeDim = dim == kIndexValue ? std::nullopt : std::make_optional(dim); - if (stopCondition(value, maybeDim)) + if (stopCondition(value, maybeDim, *this)) { + LLVM_DEBUG(llvm::dbgs() << "Stop condition met for: " << value + << " (dim: " << maybeDim << ")\n"); continue; + } // Query `ValueBoundsOpInterface` for constraints. New items may be added to // the worklist. auto valueBoundsOp = dyn_cast(getOwnerOfValue(value)); + LLVM_DEBUG(llvm::dbgs() + << "Query value bounds for: " << value + << " (owner: " << getOwnerOfValue(value)->getName() << ")\n"); if (valueBoundsOp) { if (dim == kIndexValue) { valueBoundsOp.populateBoundsForIndexValue(value, *this); @@ -229,6 +239,7 @@ void ValueBoundsConstraintSet::processWorklist(StopConditionFn stopCondition) { } continue; } + LLVM_DEBUG(llvm::dbgs() << "--> ValueBoundsOpInterface not implemented\n"); // If the op does not implement `ValueBoundsOpInterface`, check if it // implements the `DestinationStyleOpInterface`. OpResults of such ops are @@ -278,8 +289,6 @@ LogicalResult ValueBoundsConstraintSet::computeBound( bool closedUB) { #ifndef NDEBUG assertValidValueDim(value, dim); - assert(!stopCondition(value, dim) && - "stop condition should not be satisfied for starting point"); #endif // NDEBUG int64_t ubAdjustment = closedUB ? 0 : 1; @@ -289,9 +298,11 @@ LogicalResult ValueBoundsConstraintSet::computeBound( // Process the backward slice of `value` (i.e., reverse use-def chain) until // `stopCondition` is met. ValueDim valueDim = std::make_pair(value, dim.value_or(kIndexValue)); - ValueBoundsConstraintSet cstr(value.getContext()); + ValueBoundsConstraintSet cstr(value.getContext(), stopCondition); + assert(!stopCondition(value, dim, cstr) && + "stop condition should not be satisfied for starting point"); int64_t pos = cstr.insert(value, dim, /*isSymbol=*/false); - cstr.processWorklist(stopCondition); + cstr.processWorklist(); // Project out all variables (apart from `valueDim`) that do not match the // stop condition. @@ -301,7 +312,7 @@ LogicalResult ValueBoundsConstraintSet::computeBound( return false; auto maybeDim = p.second == kIndexValue ? std::nullopt : std::make_optional(p.second); - return !stopCondition(p.first, maybeDim); + return !stopCondition(p.first, maybeDim, cstr); }); // Compute lower and upper bounds for `valueDim`. @@ -407,7 +418,7 @@ LogicalResult ValueBoundsConstraintSet::computeDependentBound( bool closedUB) { return computeBound( resultMap, mapOperands, type, value, dim, - [&](Value v, std::optional d) { + [&](Value v, std::optional d, ValueBoundsConstraintSet &cstr) { return llvm::is_contained(dependencies, std::make_pair(v, d)); }, closedUB); @@ -443,7 +454,9 @@ LogicalResult ValueBoundsConstraintSet::computeIndependentBound( // Reify bounds in terms of any independent values. return computeBound( resultMap, mapOperands, type, value, dim, - [&](Value v, std::optional d) { return isIndependent(v); }, + [&](Value v, std::optional d, ValueBoundsConstraintSet &cstr) { + return isIndependent(v); + }, closedUB); } @@ -476,21 +489,19 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( presburger::BoundType type, AffineMap map, ValueDimList operands, StopConditionFn stopCondition, bool closedUB) { assert(map.getNumResults() == 1 && "expected affine map with one result"); - ValueBoundsConstraintSet cstr(map.getContext()); - int64_t pos = 0; - if (stopCondition) { - cstr.populateConstraintsSet(map, operands, stopCondition, &pos); - } else { - // No stop condition specified: Keep adding constraints until a bound could - // be computed. - cstr.populateConstraintsSet( - map, operands, - [&](Value v, std::optional dim) { - return cstr.cstr.getConstantBound64(type, pos).has_value(); - }, - &pos); - } + // Default stop condition if none was specified: Keep adding constraints until + // a bound could be computed. + int64_t pos; + auto defaultStopCondition = [&](Value v, std::optional dim, + ValueBoundsConstraintSet &cstr) { + return cstr.cstr.getConstantBound64(type, pos).has_value(); + }; + + ValueBoundsConstraintSet cstr( + map.getContext(), stopCondition ? stopCondition : defaultStopCondition); + cstr.populateConstraintsSet(map, operands, &pos); + // Compute constant bound for `valueDim`. int64_t ubAdjustment = closedUB ? 0 : 1; if (auto bound = cstr.cstr.getConstantBound64(type, pos)) @@ -498,8 +509,9 @@ FailureOr ValueBoundsConstraintSet::computeConstantBound( return failure(); } -int64_t ValueBoundsConstraintSet::populateConstraintsSet( - Value value, std::optional dim, StopConditionFn stopCondition) { +int64_t +ValueBoundsConstraintSet::populateConstraintsSet(Value value, + std::optional dim) { #ifndef NDEBUG assertValidValueDim(value, dim); #endif // NDEBUG @@ -507,12 +519,12 @@ int64_t ValueBoundsConstraintSet::populateConstraintsSet( AffineMap map = AffineMap::get(/*dimCount=*/1, /*symbolCount=*/0, Builder(value.getContext()).getAffineDimExpr(0)); - return populateConstraintsSet(map, {{value, dim}}, stopCondition); + return populateConstraintsSet(map, {{value, dim}}); } -int64_t ValueBoundsConstraintSet::populateConstraintsSet( - AffineMap map, ValueDimList operands, StopConditionFn stopCondition, - int64_t *posOut) { +int64_t ValueBoundsConstraintSet::populateConstraintsSet(AffineMap map, + ValueDimList operands, + int64_t *posOut) { assert(map.getNumResults() == 1 && "expected affine map with one result"); int64_t pos = insert(/*isSymbol=*/false); if (posOut) @@ -533,13 +545,7 @@ int64_t ValueBoundsConstraintSet::populateConstraintsSet( // Process the backward slice of `operands` (i.e., reverse use-def chain) // until `stopCondition` is met. - if (stopCondition) { - processWorklist(stopCondition); - } else { - // No stop condition specified: Keep adding constraints until the worklist - // is empty. - processWorklist([](Value v, std::optional dim) { return false; }); - } + processWorklist(); return pos; } diff --git a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp index 5e160b7..4b2b1a0 100644 --- a/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp +++ b/mlir/test/lib/Dialect/Affine/TestReifyValueBounds.cpp @@ -117,14 +117,17 @@ static LogicalResult testReifyValueBounds(func::FuncOp funcOp, // Prepare stop condition. By default, reify in terms of the op's // operands. No stop condition is used when a constant was requested. - std::function)> stopCondition = - [&](Value v, std::optional d) { + std::function, + ValueBoundsConstraintSet & cstr)> + stopCondition = [&](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { // Reify in terms of SSA values that are different from `value`. return v != value; }; if (reifyToFuncArgs) { // Reify in terms of function block arguments. - stopCondition = stopCondition = [](Value v, std::optional d) { + stopCondition = [](Value v, std::optional d, + ValueBoundsConstraintSet &cstr) { auto bbArg = dyn_cast(v); if (!bbArg) return false; -- cgit v1.1 From a2306b65d223212dcfafe12c7299262d8d4fdcb4 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 4 Apr 2024 10:27:08 +0200 Subject: [libc] Refactor `BigInt` (#86137) This patch moves most of the multiprecision logic to the `multiword` namespace and simplifies some logic in `BigInt`. It also fully implements the mask and count functions and increases test coverage. `math_extras.h` is also reworked to make it more concise. --- libc/fuzzing/CMakeLists.txt | 1 + libc/fuzzing/__support/CMakeLists.txt | 7 + libc/fuzzing/__support/uint_fuzz.cpp | 70 ++ libc/src/__support/FPUtil/dyadic_float.h | 6 +- libc/src/__support/UInt.h | 1126 ++++++++++---------- libc/src/__support/float_to_string.h | 7 +- libc/src/__support/integer_literals.h | 25 +- libc/src/__support/math_extras.h | 249 ++--- libc/src/__support/number_pair.h | 11 - libc/test/src/__support/integer_literals_test.cpp | 21 + libc/test/src/__support/math_extras_test.cpp | 57 + libc/test/src/__support/uint_test.cpp | 192 +++- .../libc/test/src/__support/BUILD.bazel | 1 + 13 files changed, 1011 insertions(+), 762 deletions(-) create mode 100644 libc/fuzzing/__support/CMakeLists.txt create mode 100644 libc/fuzzing/__support/uint_fuzz.cpp diff --git a/libc/fuzzing/CMakeLists.txt b/libc/fuzzing/CMakeLists.txt index 8248768..816691b 100644 --- a/libc/fuzzing/CMakeLists.txt +++ b/libc/fuzzing/CMakeLists.txt @@ -1,6 +1,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") add_custom_target(libc-fuzzer) +add_subdirectory(__support) # TODO(#85680): Re-enable math fuzzing after headers are sorted out # add_subdirectory(math) add_subdirectory(stdlib) diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt new file mode 100644 index 0000000..278e914 --- /dev/null +++ b/libc/fuzzing/__support/CMakeLists.txt @@ -0,0 +1,7 @@ +add_libc_fuzzer( + uint_fuzz + SRCS + uint_fuzz.cpp + DEPENDS + libc.src.__support.uint +) diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp new file mode 100644 index 0000000..f48f00d --- /dev/null +++ b/libc/fuzzing/__support/uint_fuzz.cpp @@ -0,0 +1,70 @@ +#include "src/__support/CPP/bit.h" +#include "src/__support/UInt.h" +#include "src/string/memory_utils/inline_memcpy.h" + +using namespace LIBC_NAMESPACE; + +// Helper function when using gdb / lldb to set a breakpoint and inspect values. +template void debug_and_trap(const char *msg, T a, T b) { + __builtin_trap(); +} + +#define DEBUG_AND_TRAP() + +#define TEST_BINOP(OP) \ + if ((a OP b) != (static_cast(BigInt(a) OP BigInt(b)))) \ + debug_and_trap(#OP, a, b); + +#define TEST_SHIFTOP(OP) \ + if ((a OP b) != (static_cast(BigInt(a) OP b))) \ + debug_and_trap(#OP, a, b); + +#define TEST_FUNCTION(FUN) \ + if (FUN(a) != FUN(BigInt(a))) \ + debug_and_trap(#FUN, a, b); + +// Test that basic arithmetic operations of BigInt behave like their scalar +// counterparts. +template void run_tests(T a, T b) { + TEST_BINOP(+) + TEST_BINOP(-) + TEST_BINOP(*) + if (b != 0) + TEST_BINOP(/) + if (b >= 0 && b < cpp::numeric_limits::digits) { + TEST_SHIFTOP(<<) + TEST_SHIFTOP(>>) + } + if constexpr (!BigInt::SIGNED) { + TEST_FUNCTION(cpp::has_single_bit) + TEST_FUNCTION(cpp::countr_zero) + TEST_FUNCTION(cpp::countl_zero) + TEST_FUNCTION(cpp::countl_one) + TEST_FUNCTION(cpp::countr_one) + } +} + +// Reads a T from libfuzzer data. +template T read(const uint8_t *data, size_t &remainder) { + T out = 0; + constexpr size_t T_SIZE = sizeof(T); + const size_t copy_size = remainder < T_SIZE ? remainder : T_SIZE; + inline_memcpy(&out, data, copy_size); + remainder -= copy_size; + return out; +} + +template +void run_tests(const uint8_t *data, size_t size) { + const auto a = read(data, size); + const auto b = read(data, size); + run_tests(a, b); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // unsigned + run_tests>(data, size); + // signed + run_tests>(data, size); + return 0; +} diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 73fd738..e0c205f 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -58,9 +58,9 @@ template struct DyadicFloat { // significant bit. LIBC_INLINE constexpr DyadicFloat &normalize() { if (!mantissa.is_zero()) { - int shift_length = static_cast(mantissa.clz()); + int shift_length = cpp::countl_zero(mantissa); exponent -= shift_length; - mantissa.shift_left(static_cast(shift_length)); + mantissa <<= static_cast(shift_length); } return *this; } @@ -233,7 +233,7 @@ LIBC_INLINE constexpr DyadicFloat quick_add(DyadicFloat a, result.sign = a.sign; result.exponent = a.exponent; result.mantissa = a.mantissa; - if (result.mantissa.add(b.mantissa)) { + if (result.mantissa.add_overflow(b.mantissa)) { // Mantissa addition overflow. result.shift_right(1); result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] |= diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 282efdb..c524de3 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -14,10 +14,11 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/compiler.h" // LIBC_COMPILER_IS_CLANG #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128, LIBC_TYPES_HAS_INT64 -#include "src/__support/math_extras.h" // SumCarry, DiffBorrow +#include "src/__support/math_extras.h" // add_with_carry, sub_with_borrow #include "src/__support/number_pair.h" #include // For size_t @@ -25,71 +26,321 @@ namespace LIBC_NAMESPACE { -namespace internal { -template struct half_width; +namespace multiword { -template <> struct half_width : cpp::type_identity {}; -template <> struct half_width : cpp::type_identity {}; +// A type trait mapping unsigned integers to their half-width unsigned +// counterparts. +template struct half_width; template <> struct half_width : cpp::type_identity {}; +template <> struct half_width : cpp::type_identity {}; +#ifdef LIBC_TYPES_HAS_INT64 +template <> struct half_width : cpp::type_identity {}; #ifdef LIBC_TYPES_HAS_INT128 template <> struct half_width<__uint128_t> : cpp::type_identity {}; #endif // LIBC_TYPES_HAS_INT128 - +#endif // LIBC_TYPES_HAS_INT64 template using half_width_t = typename half_width::type; -template constexpr NumberPair full_mul(T a, T b) { - NumberPair pa = split(a); - NumberPair pb = split(b); - NumberPair prod; +// An array of two elements that can be used in multiword operations. +template struct DoubleWide final : cpp::array { + using UP = cpp::array; + using UP::UP; + LIBC_INLINE constexpr DoubleWide(T lo, T hi) : UP({lo, hi}) {} +}; + +// Converts an unsigned value into a DoubleWide>. +template LIBC_INLINE constexpr auto split(T value) { + static_assert(cpp::is_unsigned_v); + return cpp::bit_cast>>(value); +} + +// The low part of a DoubleWide value. +template LIBC_INLINE constexpr T lo(const DoubleWide &value) { + return value[0]; +} +// The high part of a DoubleWide value. +template LIBC_INLINE constexpr T hi(const DoubleWide &value) { + return value[1]; +} +// The low part of an unsigned value. +template LIBC_INLINE constexpr half_width_t lo(T value) { + return lo(split(value)); +} +// The high part of an unsigned value. +template LIBC_INLINE constexpr half_width_t hi(T value) { + return hi(split(value)); +} + +// Returns 'a' times 'b' in a DoubleWide. Cannot overflow by construction. +template +LIBC_INLINE constexpr DoubleWide mul2(word a, word b) { + if constexpr (cpp::is_same_v) { + return split(uint16_t(a) * uint16_t(b)); + } else if constexpr (cpp::is_same_v) { + return split(uint32_t(a) * uint32_t(b)); + } +#ifdef LIBC_TYPES_HAS_INT64 + else if constexpr (cpp::is_same_v) { + return split(uint64_t(a) * uint64_t(b)); + } +#endif +#ifdef LIBC_TYPES_HAS_INT128 + else if constexpr (cpp::is_same_v) { + return split<__uint128_t>(__uint128_t(a) * __uint128_t(b)); + } +#endif + else { + using half_word = half_width_t; + const auto shiftl = [](word value) -> word { + return value << cpp::numeric_limits::digits; + }; + const auto shiftr = [](word value) -> word { + return value >> cpp::numeric_limits::digits; + }; + // Here we do a one digit multiplication where 'a' and 'b' are of type + // word. We split 'a' and 'b' into half words and perform the classic long + // multiplication with 'a' and 'b' being two-digit numbers. + + // a a_hi a_lo + // x b => x b_hi b_lo + // ---- ----------- + // c result + // We convert 'lo' and 'hi' from 'half_word' to 'word' so multiplication + // doesn't overflow. + const word a_lo = lo(a); + const word b_lo = lo(b); + const word a_hi = hi(a); + const word b_hi = hi(b); + const word step1 = b_lo * a_lo; // no overflow; + const word step2 = b_lo * a_hi; // no overflow; + const word step3 = b_hi * a_lo; // no overflow; + const word step4 = b_hi * a_hi; // no overflow; + word lo_digit = step1; + word hi_digit = step4; + const word no_carry = 0; + word carry; + word _; // unused carry variable. + lo_digit = add_with_carry(lo_digit, shiftl(step2), no_carry, carry); + hi_digit = add_with_carry(hi_digit, shiftr(step2), carry, _); + lo_digit = add_with_carry(lo_digit, shiftl(step3), no_carry, carry); + hi_digit = add_with_carry(hi_digit, shiftr(step3), carry, _); + return DoubleWide(lo_digit, hi_digit); + } +} + +// In-place 'dst op= rhs' with operation with carry propagation. Returns carry. +template +LIBC_INLINE constexpr word inplace_binop(Function op_with_carry, + cpp::array &dst, + const cpp::array &rhs) { + static_assert(N >= M); + word carry_out = 0; + for (size_t i = 0; i < N; ++i) { + const bool has_rhs_value = i < M; + const word rhs_value = has_rhs_value ? rhs[i] : 0; + const word carry_in = carry_out; + dst[i] = op_with_carry(dst[i], rhs_value, carry_in, carry_out); + // stop early when rhs is over and no carry is to be propagated. + if (!has_rhs_value && carry_out == 0) + break; + } + return carry_out; +} - prod.lo = pa.lo * pb.lo; // exact - prod.hi = pa.hi * pb.hi; // exact - NumberPair lo_hi = split(pa.lo * pb.hi); // exact - NumberPair hi_lo = split(pa.hi * pb.lo); // exact +// In-place addition. Returns carry. +template +LIBC_INLINE constexpr word add_with_carry(cpp::array &dst, + const cpp::array &rhs) { + return inplace_binop(LIBC_NAMESPACE::add_with_carry, dst, rhs); +} + +// In-place subtraction. Returns borrow. +template +LIBC_INLINE constexpr word sub_with_borrow(cpp::array &dst, + const cpp::array &rhs) { + return inplace_binop(LIBC_NAMESPACE::sub_with_borrow, dst, rhs); +} + +// In-place multiply-add. Returns carry. +// i.e., 'dst += b * c' +template +LIBC_INLINE constexpr word mul_add_with_carry(cpp::array &dst, word b, + word c) { + return add_with_carry(dst, mul2(b, c)); +} - constexpr size_t HALF_BIT_WIDTH = sizeof(T) * CHAR_BIT / 2; +// An array of two elements serving as an accumulator during multiword +// computations. +template struct Accumulator final : cpp::array { + using UP = cpp::array; + LIBC_INLINE constexpr Accumulator() : UP({0, 0}) {} + LIBC_INLINE constexpr T advance(T carry_in) { + auto result = UP::front(); + UP::front() = UP::back(); + UP::back() = carry_in; + return result; + } + LIBC_INLINE constexpr T sum() const { return UP::front(); } + LIBC_INLINE constexpr T carry() const { return UP::back(); } +}; - auto r1 = add_with_carry(prod.lo, lo_hi.lo << HALF_BIT_WIDTH, T(0)); - prod.lo = r1.sum; - prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum; +// In-place multiplication by a single word. Returns carry. +template +LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array &dst, + word x) { + Accumulator acc; + for (auto &val : dst) { + const word carry = mul_add_with_carry(acc, val, x); + val = acc.advance(carry); + } + return acc.carry(); +} - auto r2 = add_with_carry(prod.lo, hi_lo.lo << HALF_BIT_WIDTH, T(0)); - prod.lo = r2.sum; - prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum; +// Multiplication of 'lhs' by 'rhs' into 'dst'. Returns carry. +// This function is safe to use for signed numbers. +// https://stackoverflow.com/a/20793834 +// https://pages.cs.wisc.edu/%7Emarkhill/cs354/Fall2008/beyond354/int.mult.html +template +LIBC_INLINE constexpr word multiply_with_carry(cpp::array &dst, + const cpp::array &lhs, + const cpp::array &rhs) { + static_assert(O >= M + N); + Accumulator acc; + for (size_t i = 0; i < O; ++i) { + const size_t lower_idx = i < N ? 0 : i - N + 1; + const size_t upper_idx = i < M ? i : M - 1; + word carry = 0; + for (size_t j = lower_idx; j <= upper_idx; ++j) + carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); + dst[i] = acc.advance(carry); + } + return acc.carry(); +} - return prod; +template +LIBC_INLINE constexpr void quick_mul_hi(cpp::array &dst, + const cpp::array &lhs, + const cpp::array &rhs) { + Accumulator acc; + word carry = 0; + // First round of accumulation for those at N - 1 in the full product. + for (size_t i = 0; i < N; ++i) + carry += mul_add_with_carry(acc, lhs[i], rhs[N - 1 - i]); + for (size_t i = N; i < 2 * N - 1; ++i) { + acc.advance(carry); + carry = 0; + for (size_t j = i - N + 1; j < N; ++j) + carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); + dst[i - N] = acc.sum(); + } + dst.back() = acc.carry(); } -template <> -LIBC_INLINE constexpr NumberPair full_mul(uint32_t a, - uint32_t b) { - uint64_t prod = uint64_t(a) * uint64_t(b); - NumberPair result; - result.lo = uint32_t(prod); - result.hi = uint32_t(prod >> 32); - return result; +template +LIBC_INLINE constexpr bool is_negative(cpp::array &array) { + using signed_word = cpp::make_signed_t; + return cpp::bit_cast(array.back()) < 0; } +// An enum for the shift function below. +enum Direction { LEFT, RIGHT }; + +// A bitwise shift on an array of elements. +// TODO: Make the result UB when 'offset' is greater or equal to the number of +// bits in 'array'. This will allow for better code performance. +template +LIBC_INLINE constexpr cpp::array shift(cpp::array array, + size_t offset) { + static_assert(direction == LEFT || direction == RIGHT); + constexpr size_t WORD_BITS = cpp::numeric_limits::digits; + constexpr size_t TOTAL_BITS = N * WORD_BITS; + if (LIBC_UNLIKELY(offset == 0)) + return array; + if (LIBC_UNLIKELY(offset >= TOTAL_BITS)) + return {}; #ifdef LIBC_TYPES_HAS_INT128 -template <> -LIBC_INLINE constexpr NumberPair full_mul(uint64_t a, - uint64_t b) { - __uint128_t prod = __uint128_t(a) * __uint128_t(b); - NumberPair result; - result.lo = uint64_t(prod); - result.hi = uint64_t(prod >> 64); - return result; + if constexpr (TOTAL_BITS == 128) { + using type = cpp::conditional_t; + auto tmp = cpp::bit_cast(array); + if constexpr (direction == LEFT) + tmp <<= offset; + else + tmp >>= offset; + return cpp::bit_cast>(tmp); + } +#endif + const bool is_neg = is_signed && is_negative(array); + constexpr auto at = [](size_t index) -> int { + // reverse iteration when direction == LEFT. + if constexpr (direction == LEFT) + return int(N) - int(index) - 1; + return int(index); + }; + const auto safe_get_at = [&](size_t index) -> word { + // return appropriate value when accessing out of bound elements. + const int i = at(index); + if (i < 0) + return 0; + if (i >= int(N)) + return is_neg ? -1 : 0; + return array[i]; + }; + const size_t index_offset = offset / WORD_BITS; + const size_t bit_offset = offset % WORD_BITS; +#ifdef LIBC_COMPILER_IS_CLANG + __builtin_assume(index_offset < N); +#endif + cpp::array out = {}; + for (size_t index = 0; index < N; ++index) { + const word part1 = safe_get_at(index + index_offset); + const word part2 = safe_get_at(index + index_offset + 1); + word &dst = out[at(index)]; + if (bit_offset == 0) + dst = part1; // no crosstalk between parts. + else if constexpr (direction == LEFT) + dst = (part1 << bit_offset) | (part2 >> (WORD_BITS - bit_offset)); + else + dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset)); + } + return out; } -#endif // LIBC_TYPES_HAS_INT128 -} // namespace internal +#define DECLARE_COUNTBIT(NAME, INDEX_EXPR) \ + template \ + LIBC_INLINE constexpr int NAME(const cpp::array &val) { \ + int bit_count = 0; \ + for (size_t i = 0; i < N; ++i) { \ + const int word_count = cpp::NAME(val[INDEX_EXPR]); \ + bit_count += word_count; \ + if (word_count != cpp::numeric_limits::digits) \ + break; \ + } \ + return bit_count; \ + } + +DECLARE_COUNTBIT(countr_zero, i) // iterating forward +DECLARE_COUNTBIT(countr_one, i) // iterating forward +DECLARE_COUNTBIT(countl_zero, N - i - 1) // iterating backward +DECLARE_COUNTBIT(countl_one, N - i - 1) // iterating backward + +} // namespace multiword template struct BigInt { +private: static_assert(cpp::is_integral_v && cpp::is_unsigned_v, "WordType must be unsigned integer."); + struct Division { + BigInt quotient; + BigInt remainder; + }; + +public: using word_type = WordType; + using unsigned_type = BigInt; + using signed_type = BigInt; + LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR @@ -100,10 +351,7 @@ struct BigInt { LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; - using unsigned_type = BigInt; - using signed_type = BigInt; - - cpp::array val{}; + cpp::array val{}; // zero initialized. LIBC_INLINE constexpr BigInt() = default; @@ -112,76 +360,67 @@ struct BigInt { template LIBC_INLINE constexpr BigInt( const BigInt &other) { - if (OtherBits >= Bits) { + if (OtherBits >= Bits) { // truncate for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = other[i]; - } else { + } else { // zero or sign extend size_t i = 0; for (; i < OtherBits / WORD_SIZE; ++i) val[i] = other[i]; - WordType sign = 0; - if constexpr (Signed && OtherSigned) { - sign = static_cast( - -static_cast>(other.is_neg())); - } - for (; i < WORD_COUNT; ++i) - val[i] = sign; + extend(i, Signed && other.is_neg()); } } // Construct a BigInt from a C array. - template = 0> - LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { - size_t min_wordcount = N < WORD_COUNT ? N : WORD_COUNT; - size_t i = 0; - for (; i < min_wordcount; ++i) + template LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { + static_assert(N == WORD_COUNT); + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = nums[i]; + } - // If nums doesn't completely fill val, then fill the rest with zeroes. - for (; i < WORD_COUNT; ++i) - val[i] = 0; + LIBC_INLINE constexpr explicit BigInt( + const cpp::array &words) { + val = words; } // Initialize the first word to |v| and the rest to 0. template >> LIBC_INLINE constexpr BigInt(T v) { - val[0] = static_cast(v); - - if constexpr (WORD_COUNT == 1) - return; - - if constexpr (Bits < sizeof(T) * CHAR_BIT) { - for (int i = 1; i < WORD_COUNT; ++i) { - v >>= WORD_SIZE; - val[i] = static_cast(v); + constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; + const bool is_neg = Signed && (v < 0); + for (size_t i = 0; i < WORD_COUNT; ++i) { + if (v == 0) { + extend(i, is_neg); + return; } - return; - } - - size_t i = 1; - - if constexpr (WORD_SIZE < sizeof(T) * CHAR_BIT) - for (; i < sizeof(T) * CHAR_BIT / WORD_SIZE; ++i) { + val[i] = static_cast(v); + if constexpr (T_SIZE > WORD_SIZE) v >>= WORD_SIZE; - val[i] = static_cast(v); - } - - WordType sign = (Signed && (v < 0)) ? ~WordType(0) : WordType(0); - for (; i < WORD_COUNT; ++i) { - val[i] = sign; + else + v = 0; } } + LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; - LIBC_INLINE constexpr explicit BigInt( - const cpp::array &words) { - for (size_t i = 0; i < WORD_COUNT; ++i) - val[i] = words[i]; + // constants + LIBC_INLINE static constexpr BigInt zero() { return BigInt(); } + LIBC_INLINE static constexpr BigInt one() { return BigInt(1); } + LIBC_INLINE static constexpr BigInt all_ones() { return ~zero(); } + LIBC_INLINE static constexpr BigInt min() { + BigInt out; + if constexpr (SIGNED) + out.set_msb(); + return out; + } + LIBC_INLINE static constexpr BigInt max() { + BigInt out = all_ones(); + if constexpr (SIGNED) + out.clear_msb(); + return out; } // TODO: Reuse the Sign type. - LIBC_INLINE constexpr bool is_neg() const { - return val.back() >> (WORD_SIZE - 1); - } + LIBC_INLINE constexpr bool is_neg() const { return SIGNED && get_msb(); } template LIBC_INLINE constexpr explicit operator T() const { return to(); @@ -191,200 +430,100 @@ struct BigInt { LIBC_INLINE constexpr cpp::enable_if_t< cpp::is_integral_v && !cpp::is_same_v, T> to() const { + constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; T lo = static_cast(val[0]); - - constexpr size_t T_BITS = sizeof(T) * CHAR_BIT; - - if constexpr (T_BITS <= WORD_SIZE) + if constexpr (T_SIZE <= WORD_SIZE) return lo; - constexpr size_t MAX_COUNT = - T_BITS > Bits ? WORD_COUNT : T_BITS / WORD_SIZE; + T_SIZE > Bits ? WORD_COUNT : T_SIZE / WORD_SIZE; for (size_t i = 1; i < MAX_COUNT; ++i) lo += static_cast(val[i]) << (WORD_SIZE * i); - - if constexpr (Signed && (T_BITS > Bits)) { + if constexpr (Signed && (T_SIZE > Bits)) { // Extend sign for negative numbers. constexpr T MASK = (~T(0) << Bits); if (is_neg()) lo |= MASK; } - return lo; } LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); } - LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; - LIBC_INLINE constexpr bool is_zero() const { - for (size_t i = 0; i < WORD_COUNT; ++i) { - if (val[i] != 0) + for (auto part : val) + if (part != 0) return false; - } return true; } - // Add x to this number and store the result in this number. + // Add 'rhs' to this number and store the result in this number. // Returns the carry value produced by the addition operation. - LIBC_INLINE constexpr WordType add(const BigInt &x) { - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], x.val[i], s.carry); - val[i] = s.sum; - } - return s.carry; + LIBC_INLINE constexpr WordType add_overflow(const BigInt &rhs) { + return multiword::add_with_carry(val, rhs.val); } LIBC_INLINE constexpr BigInt operator+(const BigInt &other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], other.val[i], s.carry); - result.val[i] = s.sum; - } + BigInt result = *this; + result.add_overflow(other); return result; } // This will only apply when initializing a variable from constant values, so // it will always use the constexpr version of add_with_carry. LIBC_INLINE constexpr BigInt operator+(BigInt &&other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], other.val[i], s.carry); - result.val[i] = s.sum; - } - return result; + // We use addition commutativity to reuse 'other' and prevent allocation. + other.add_overflow(*this); // Returned carry value is ignored. + return other; } LIBC_INLINE constexpr BigInt &operator+=(const BigInt &other) { - add(other); // Returned carry value is ignored. + add_overflow(other); // Returned carry value is ignored. return *this; } - // Subtract x to this number and store the result in this number. + // Subtract 'rhs' to this number and store the result in this number. // Returns the carry value produced by the subtraction operation. - LIBC_INLINE constexpr WordType sub(const BigInt &x) { - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], x.val[i], d.borrow); - val[i] = d.diff; - } - return d.borrow; + LIBC_INLINE constexpr WordType sub_overflow(const BigInt &rhs) { + return multiword::sub_with_borrow(val, rhs.val); } LIBC_INLINE constexpr BigInt operator-(const BigInt &other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], other.val[i], d.borrow); - result.val[i] = d.diff; - } + BigInt result = *this; + result.sub_overflow(other); // Returned carry value is ignored. return result; } LIBC_INLINE constexpr BigInt operator-(BigInt &&other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], other.val[i], d.borrow); - result.val[i] = d.diff; - } + BigInt result = *this; + result.sub_overflow(other); // Returned carry value is ignored. return result; } LIBC_INLINE constexpr BigInt &operator-=(const BigInt &other) { // TODO(lntue): Set overflow flag / errno when carry is true. - sub(other); + sub_overflow(other); // Returned carry value is ignored. return *this; } - // Multiply this number with x and store the result in this number. It is - // implemented using the long multiplication algorithm by splitting the - // 64-bit words of this number and |x| in to 32-bit halves but peforming - // the operations using 64-bit numbers. This ensures that we don't lose the - // carry bits. - // Returns the carry value produced by the multiplication operation. + // Multiply this number with x and store the result in this number. LIBC_INLINE constexpr WordType mul(WordType x) { - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - for (size_t i = 0; i < WORD_COUNT; ++i) { - NumberPair prod = internal::full_mul(val[i], x); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - const WordType carry = partial_sum.add(tmp); - val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - } - return partial_sum.val[1]; - } - - LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { - if constexpr (Signed) { - BigInt a(*this); - BigInt b(other); - const bool a_neg = a.is_neg(); - const bool b_neg = b.is_neg(); - if (a_neg) - a = -a; - if (b_neg) - b = -b; - BigInt prod = a * b; - if (a_neg != b_neg) - prod = -prod; - return static_cast>(prod); - } else { - if constexpr (WORD_COUNT == 1) { - return {val[0] * other.val[0]}; - } else { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - for (size_t i = 0; i < WORD_COUNT; ++i) { - for (size_t j = 0; j <= i; j++) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - } - return result; - } - } + return multiword::scalar_multiply_with_carry(val, x); } - // Return the full product, only unsigned for now. + // Return the full product. template - LIBC_INLINE constexpr BigInt + LIBC_INLINE constexpr auto ful_mul(const BigInt &other) const { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - constexpr size_t OTHER_WORDCOUNT = - BigInt::WORD_COUNT; - for (size_t i = 0; i <= WORD_COUNT + OTHER_WORDCOUNT - 2; ++i) { - const size_t lower_idx = - i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1; - const size_t upper_idx = i < WORD_COUNT ? i : WORD_COUNT - 1; - for (size_t j = lower_idx; j <= upper_idx; ++j) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - } - result.val[WORD_COUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0]; + BigInt result; + multiword::multiply_with_carry(result.val, val, other.val); return result; } + LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { + // Perform full mul and truncate. + return BigInt(ful_mul(other)); + } + // Fast hi part of the full product. The normal product `operator*` returns // `Bits` least significant bits of the full product, while this function will // approximate `Bits` most significant bits of the full product with errors @@ -407,39 +546,17 @@ struct BigInt { // 256 4 16 10 3 // 512 8 64 36 7 LIBC_INLINE constexpr BigInt quick_mul_hi(const BigInt &other) const { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - // First round of accumulation for those at WORD_COUNT - 1 in the full - // product. - for (size_t i = 0; i < WORD_COUNT; ++i) { - NumberPair prod = - internal::full_mul(val[i], other.val[WORD_COUNT - 1 - i]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - for (size_t i = WORD_COUNT; i < 2 * WORD_COUNT - 1; ++i) { - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - for (size_t j = i - WORD_COUNT + 1; j < WORD_COUNT; ++j) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i - WORD_COUNT] = partial_sum.val[0]; - } - result.val[WORD_COUNT - 1] = partial_sum.val[1]; + BigInt result; + multiword::quick_mul_hi(result.val, val, other.val); return result; } - // pow takes a power and sets this to its starting value to that power. Zero - // to the zeroth power returns 1. + // BigInt(x).pow_n(n) computes x ^ n. + // Note 0 ^ 0 == 1. LIBC_INLINE constexpr void pow_n(uint64_t power) { - BigInt result = 1; + static_assert(!Signed); + BigInt result = one(); BigInt cur_power = *this; - while (power > 0) { if ((power % 2) > 0) result *= cur_power; @@ -449,38 +566,23 @@ struct BigInt { *this = result; } - // TODO: Make division work correctly for signed integers. - - // div takes another BigInt of the same size and divides this by it. The value - // of this will be set to the quotient, and the return value is the remainder. - LIBC_INLINE constexpr cpp::optional div(const BigInt &other) { - BigInt remainder(0); - if (*this < other) { - remainder = *this; - *this = BigInt(0); - return remainder; - } - if (other == 1) { - return remainder; - } - if (other == 0) { + // Performs inplace signed / unsigned division. Returns remainder if not + // dividing by zero. + // For signed numbers it behaves like C++ signed integer division. + // That is by truncating the fractionnal part + // https://stackoverflow.com/a/3602857 + LIBC_INLINE constexpr cpp::optional div(const BigInt ÷r) { + if (LIBC_UNLIKELY(divider.is_zero())) return cpp::nullopt; - } - - BigInt quotient(0); - BigInt subtractor = other; - int cur_bit = static_cast(subtractor.clz() - this->clz()); - subtractor.shift_left(cur_bit); - - for (; cur_bit >= 0 && *this > 0; --cur_bit, subtractor.shift_right(1)) { - if (*this >= subtractor) { - this->sub(subtractor); - quotient = quotient | (BigInt(1) << cur_bit); - } - } - remainder = *this; - *this = quotient; - return remainder; + if (LIBC_UNLIKELY(divider == BigInt::one())) + return BigInt::zero(); + Division result; + if constexpr (SIGNED) + result = divide_signed(*this, divider); + else + result = divide_unsigned(*this, divider); + *this = result.quotient; + return result.remainder; } // Efficiently perform BigInt / (x * 2^e), where x is a half-word-size @@ -496,19 +598,16 @@ struct BigInt { // computation of each step is now properly contained within WordType. // And finally we perform some extra alignment steps for the remaining bits. LIBC_INLINE constexpr cpp::optional - div_uint_half_times_pow_2(internal::half_width_t x, size_t e) { - BigInt remainder(0); - - if (x == 0) { + div_uint_half_times_pow_2(multiword::half_width_t x, size_t e) { + BigInt remainder; + if (x == 0) return cpp::nullopt; - } if (e >= Bits) { remainder = *this; - *this = BigInt(0); + *this = BigInt(); return remainder; } - - BigInt quotient(0); + BigInt quotient; WordType x_word = static_cast(x); constexpr size_t LOG2_WORD_SIZE = cpp::bit_width(WORD_SIZE) - 1; constexpr size_t HALF_WORD_SIZE = WORD_SIZE >> 1; @@ -633,189 +732,22 @@ struct BigInt { return *this; } - // TODO: remove and use cpp::countl_zero below. - [[nodiscard]] LIBC_INLINE constexpr int clz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int leading_zeroes = 0; - for (auto i = val.size(); i > 0;) { - --i; - const int zeroes = cpp::countl_zero(val[i]); - leading_zeroes += zeroes; - if (zeroes != word_digits) - break; - } - return leading_zeroes; - } - - // TODO: remove and use cpp::countr_zero below. - [[nodiscard]] LIBC_INLINE constexpr int ctz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int trailing_zeroes = 0; - for (auto word : val) { - const int zeroes = cpp::countr_zero(word); - trailing_zeroes += zeroes; - if (zeroes != word_digits) - break; - } - return trailing_zeroes; - } - - LIBC_INLINE constexpr void shift_left(size_t s) { - if constexpr (Bits == WORD_SIZE) { - // Use native types if possible. - if (s >= WORD_SIZE) { - val[0] = 0; - return; - } - val[0] <<= s; - return; - } - if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { - // Use builtin 64 bits for 32-bit base type if available; - if (s >= 64) { - val[0] = 0; - val[1] = 0; - return; - } - uint64_t tmp = uint64__t(val[0]) + (uint64_t(val[1]) << 62); - tmp <<= s; - val[0] = uint32_t(tmp); - val[1] = uint32_t(tmp >> 32); - return; - } -#ifdef LIBC_TYPES_HAS_INT128 - if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { - // Use builtin 128 bits if available; - if (s >= 128) { - val[0] = 0; - val[1] = 0; - return; - } - __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); - tmp <<= s; - val[0] = uint64_t(tmp); - val[1] = uint64_t(tmp >> 64); - return; - } -#endif // LIBC_TYPES_HAS_INT128 - if (LIBC_UNLIKELY(s == 0)) - return; - - const size_t drop = s / WORD_SIZE; // Number of words to drop - const size_t shift = s % WORD_SIZE; // Bits to shift in the remaining words. - size_t i = WORD_COUNT; - - if (drop < WORD_COUNT) { - i = WORD_COUNT - 1; - if (shift > 0) { - for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { - val[i] = (val[j] << shift) | (val[j - 1] >> (WORD_SIZE - shift)); - } - val[i] = val[0] << shift; - } else { - for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { - val[i] = val[j]; - } - val[i] = val[0]; - } - } - - for (size_t j = 0; j < i; ++j) { - val[j] = 0; - } + LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { + val = multiword::shift(val, s); + return *this; } LIBC_INLINE constexpr BigInt operator<<(size_t s) const { - BigInt result(*this); - result.shift_left(s); - return result; + return BigInt(multiword::shift(val, s)); } - LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { - shift_left(s); + LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { + val = multiword::shift(val, s); return *this; } - LIBC_INLINE constexpr void shift_right(size_t s) { - if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { - // Use builtin 64 bits if available; - if (s >= 64) { - val[0] = 0; - val[1] = 0; - return; - } - uint64_t tmp = uint64_t(val[0]) + (uint64_t(val[1]) << 32); - if constexpr (Signed) { - tmp = static_cast(static_cast(tmp) >> s); - } else { - tmp >>= s; - } - val[0] = uint32_t(tmp); - val[1] = uint32_t(tmp >> 32); - return; - } -#ifdef LIBC_TYPES_HAS_INT128 - if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { - // Use builtin 128 bits if available; - if (s >= 128) { - val[0] = 0; - val[1] = 0; - return; - } - __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); - if constexpr (Signed) { - tmp = static_cast<__uint128_t>(static_cast<__int128_t>(tmp) >> s); - } else { - tmp >>= s; - } - val[0] = uint64_t(tmp); - val[1] = uint64_t(tmp >> 64); - return; - } -#endif // LIBC_TYPES_HAS_INT128 - - if (LIBC_UNLIKELY(s == 0)) - return; - const size_t drop = s / WORD_SIZE; // Number of words to drop - const size_t shift = s % WORD_SIZE; // Bit shift in the remaining words. - - size_t i = 0; - WordType sign = Signed ? is_neg() : 0; - - if (drop < WORD_COUNT) { - if (shift > 0) { - for (size_t j = drop; j < WORD_COUNT - 1; ++i, ++j) { - val[i] = (val[j] >> shift) | (val[j + 1] << (WORD_SIZE - shift)); - } - if constexpr (Signed) { - val[i] = static_cast( - static_cast>(val[WORD_COUNT - 1]) >> - shift); - } else { - val[i] = val[WORD_COUNT - 1] >> shift; - } - ++i; - } else { - for (size_t j = drop; j < WORD_COUNT; ++i, ++j) { - val[i] = val[j]; - } - } - } - - for (; i < WORD_COUNT; ++i) { - val[i] = sign; - } - } - LIBC_INLINE constexpr BigInt operator>>(size_t s) const { - BigInt result(*this); - result.shift_right(s); - return result; - } - - LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { - shift_right(s); - return *this; + return BigInt(multiword::shift(val, s)); } #define DEFINE_BINOP(OP) \ @@ -833,10 +765,9 @@ struct BigInt { return lhs; \ } - DEFINE_BINOP(&) - DEFINE_BINOP(|) - DEFINE_BINOP(^) - + DEFINE_BINOP(&) // & and &= + DEFINE_BINOP(|) // | and |= + DEFINE_BINOP(^) // ^ and ^= #undef DEFINE_BINOP LIBC_INLINE constexpr BigInt operator~() const { @@ -847,8 +778,8 @@ struct BigInt { } LIBC_INLINE constexpr BigInt operator-() const { - BigInt result = ~(*this); - result.add(BigInt(1)); + BigInt result(*this); + result.negate(); return result; } @@ -865,24 +796,6 @@ struct BigInt { return !(lhs == rhs); } -private: - LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { - const auto compare = [](WordType a, WordType b) { - return a == b ? 0 : a > b ? 1 : -1; - }; - if constexpr (Signed) { - const bool lhs_is_neg = lhs.is_neg(); - const bool rhs_is_neg = rhs.is_neg(); - if (lhs_is_neg != rhs_is_neg) - return rhs_is_neg ? 1 : -1; - } - for (size_t i = WORD_COUNT; i-- > 0;) - if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) - return cmp; - return 0; - } - -public: LIBC_INLINE friend constexpr bool operator>(const BigInt &lhs, const BigInt &rhs) { return cmp(lhs, rhs) > 0; @@ -901,24 +814,24 @@ public: } LIBC_INLINE constexpr BigInt &operator++() { - add(BigInt(1)); + increment(); return *this; } LIBC_INLINE constexpr BigInt operator++(int) { BigInt oldval(*this); - add(BigInt(1)); + increment(); return oldval; } LIBC_INLINE constexpr BigInt &operator--() { - sub(BigInt(1)); + decrement(); return *this; } LIBC_INLINE constexpr BigInt operator--(int) { BigInt oldval(*this); - sub(BigInt(1)); + decrement(); return oldval; } @@ -930,9 +843,117 @@ public: // Return the i-th word of the number. LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; } - LIBC_INLINE WordType *data() { return val; } +private: + LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { + constexpr auto compare = [](WordType a, WordType b) { + return a == b ? 0 : a > b ? 1 : -1; + }; + if constexpr (Signed) { + const bool lhs_is_neg = lhs.is_neg(); + const bool rhs_is_neg = rhs.is_neg(); + if (lhs_is_neg != rhs_is_neg) + return rhs_is_neg ? 1 : -1; + } + for (size_t i = WORD_COUNT; i-- > 0;) + if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) + return cmp; + return 0; + } + + LIBC_INLINE constexpr void bitwise_not() { + for (auto &part : val) + part = ~part; + } + + LIBC_INLINE constexpr void negate() { + bitwise_not(); + increment(); + } - LIBC_INLINE const WordType *data() const { return val; } + LIBC_INLINE constexpr void increment() { + multiword::add_with_carry(val, cpp::array{1}); + } + + LIBC_INLINE constexpr void decrement() { + multiword::add_with_carry(val, cpp::array{1}); + } + + LIBC_INLINE constexpr void extend(size_t index, bool is_neg) { + const WordType value = is_neg ? cpp::numeric_limits::max() + : cpp::numeric_limits::min(); + for (size_t i = index; i < WORD_COUNT; ++i) + val[i] = value; + } + + LIBC_INLINE constexpr bool get_msb() const { + return val.back() >> (WORD_SIZE - 1); + } + + LIBC_INLINE constexpr void set_msb() { + val.back() |= mask_leading_ones(); + } + + LIBC_INLINE constexpr void clear_msb() { + val.back() &= mask_trailing_ones(); + } + + LIBC_INLINE constexpr void set_bit(size_t i) { + const size_t word_index = i / WORD_SIZE; + val[word_index] |= WordType(1) << (i % WORD_SIZE); + } + + LIBC_INLINE constexpr static Division divide_unsigned(const BigInt ÷nd, + const BigInt ÷r) { + BigInt remainder = dividend; + BigInt quotient; + if (remainder >= divider) { + BigInt subtractor = divider; + int cur_bit = multiword::countl_zero(subtractor.val) - + multiword::countl_zero(remainder.val); + subtractor <<= cur_bit; + for (; cur_bit >= 0 && remainder > 0; --cur_bit, subtractor >>= 1) { + if (remainder < subtractor) + continue; + remainder -= subtractor; + quotient.set_bit(cur_bit); + } + } + return Division{quotient, remainder}; + } + + LIBC_INLINE constexpr static Division divide_signed(const BigInt ÷nd, + const BigInt ÷r) { + // Special case because it is not possible to negate the min value of a + // signed integer. + if (dividend == min() && divider == min()) + return Division{one(), zero()}; + // 1. Convert the dividend and divisor to unsigned representation. + unsigned_type udividend(dividend); + unsigned_type udivider(divider); + // 2. Negate the dividend if it's negative, and similarly for the divisor. + const bool dividend_is_neg = dividend.is_neg(); + const bool divider_is_neg = divider.is_neg(); + if (dividend_is_neg) + udividend.negate(); + if (divider_is_neg) + udivider.negate(); + // 3. Use unsigned multiword division algorithm. + const auto unsigned_result = divide_unsigned(udividend, udivider); + // 4. Convert the quotient and remainder to signed representation. + Division result; + result.quotient = signed_type(unsigned_result.quotient); + result.remainder = signed_type(unsigned_result.remainder); + // 5. Negate the quotient if the dividend and divisor had opposite signs. + if (dividend_is_neg != divider_is_neg) + result.quotient.negate(); + // 6. Negate the remainder if the dividend was negative. + if (dividend_is_neg) + result.remainder.negate(); + return result; + } + + friend signed_type; + friend unsigned_type; }; namespace internal { @@ -962,10 +983,8 @@ using Int = BigInt>; // Provides limits of U/Int<128>. template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr UInt<128> max() { - return UInt<128>({0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}); - } - LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>(0); } + LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); } + LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -973,12 +992,8 @@ public: template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr Int<128> max() { - return Int<128>({0xffff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}); - } - LIBC_INLINE static constexpr Int<128> min() { - return Int<128>({0, 0x8000'0000'0000'0000}); - } + LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); } + LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -1112,30 +1127,28 @@ has_single_bit(T value) { template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_zero(const T &value) { - return value.ctz(); + return multiword::countr_zero(value.val); } // Specialization of cpp::countl_zero ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_zero(const T &value) { - return value.clz(); + return multiword::countl_zero(value.val); } // Specialization of cpp::countl_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countl_zero(~value); + return multiword::countl_one(value.val); } // Specialization of cpp::countr_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countr_zero(~value); + return multiword::countr_one(value.val); } // Specialization of cpp::bit_width ('bit.h') for BigInt. @@ -1182,65 +1195,59 @@ rotr(T value, int rotate) { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_trailing_ones() { - static_assert(!T::SIGNED); - if (count == 0) - return T(); - constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); - static_assert(count <= T_BITS && "Invalid bit index"); - using word_type = typename T::word_type; - T out; - constexpr int CHUNK_INDEX_CONTAINING_BIT = - static_cast(count / T::WORD_SIZE); - int index = 0; - for (auto &word : out.val) { - if (index < CHUNK_INDEX_CONTAINING_BIT) - word = -1; - else if (index > CHUNK_INDEX_CONTAINING_BIT) - word = 0; - else - word = mask_trailing_ones(); - ++index; - } + static_assert(!T::SIGNED && count <= T::BITS); + if (count == T::BITS) + return T::all_ones(); + constexpr size_t QUOTIENT = count / T::WORD_SIZE; + constexpr size_t REMAINDER = count % T::WORD_SIZE; + T out; // zero initialized + for (size_t i = 0; i <= QUOTIENT; ++i) + out[i] = i < QUOTIENT + ? -1 + : mask_trailing_ones(); return out; } // Specialization of mask_leading_ones ('math_extras.h') for BigInt. template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - static_assert(!T::SIGNED); - if (count == 0) - return T(); - constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); - static_assert(count <= T_BITS && "Invalid bit index"); - using word_type = typename T::word_type; - T out; - constexpr int CHUNK_INDEX_CONTAINING_BIT = - static_cast((T::BITS - count - 1ULL) / T::WORD_SIZE); - int index = 0; - for (auto &word : out.val) { - if (index < CHUNK_INDEX_CONTAINING_BIT) - word = 0; - else if (index > CHUNK_INDEX_CONTAINING_BIT) - word = -1; - else - word = mask_leading_ones(); - ++index; - } + static_assert(!T::SIGNED && count <= T::BITS); + if (count == T::BITS) + return T::all_ones(); + constexpr size_t QUOTIENT = (T::BITS - count - 1U) / T::WORD_SIZE; + constexpr size_t REMAINDER = count % T::WORD_SIZE; + T out; // zero initialized + for (size_t i = QUOTIENT; i < T::WORD_COUNT; ++i) + out[i] = i > QUOTIENT + ? -1 + : mask_leading_ones(); return out; } +// Specialization of mask_trailing_zeros ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_zeros() { + return mask_leading_ones(); +} + +// Specialization of mask_leading_zeros ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_zeros() { + return mask_trailing_ones(); +} + // Specialization of count_zeros ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> count_zeros(T value) { return cpp::popcount(~value); } // Specialization of first_leading_zero ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countl_one(value) + 1; @@ -1248,16 +1255,14 @@ first_leading_zero(T value) { // Specialization of first_leading_one ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_one(T value) { return first_leading_zero(~value); } // Specialization of first_trailing_zero ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(~value) + 1; @@ -1265,8 +1270,7 @@ first_trailing_zero(T value) { // Specialization of first_trailing_one ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(value) + 1; diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 1287c3e..4c59cfd 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -689,7 +689,7 @@ template <> class FloatToString { wide_int float_as_int = mantissa; - float_as_int.shift_left(exponent); + float_as_int <<= exponent; int_block_index = 0; while (float_as_int > 0) { @@ -708,10 +708,11 @@ template <> class FloatToString { const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent; static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8); - float_as_fixed.shift_left(SHIFT_AMOUNT); + float_as_fixed <<= SHIFT_AMOUNT; // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { + if (cpp::countl_zero(float_as_fixed) < + static_cast(EXTRA_INT_WIDTH)) { UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index de1f88f..e99799c 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -151,12 +151,15 @@ template struct Parser> { template LIBC_INLINE constexpr T parse_with_prefix(const char *ptr) { using P = Parser; - if (ptr[0] == '0' && ptr[1] == 'x') - return P::template parse<16>(ptr + 2); - else if (ptr[0] == '0' && ptr[1] == 'b') - return P::template parse<2>(ptr + 2); - else - return P::template parse<10>(ptr); + if (ptr == nullptr) + return T(); + if (ptr[0] == '0') { + if (ptr[1] == 'b') + return P::template parse<2>(ptr + 2); + if (ptr[1] == 'x') + return P::template parse<16>(ptr + 2); + } + return P::template parse<10>(ptr); } } // namespace internal @@ -169,6 +172,16 @@ LIBC_INLINE constexpr auto operator""_u256(const char *x) { return internal::parse_with_prefix>(x); } +template LIBC_INLINE constexpr T parse_bigint(const char *ptr) { + if (ptr == nullptr) + return T(); + if (ptr[0] == '-' || ptr[0] == '+') { + auto positive = internal::parse_with_prefix(ptr + 1); + return ptr[0] == '-' ? -positive : positive; + } + return internal::parse_with_prefix(ptr); +} + } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 70a8800..bb6424b 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H -#include "src/__support/CPP/bit.h" // countl_one, countr_zero -#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits -#include "src/__support/CPP/type_traits.h" // is_unsigned_v +#include "src/__support/CPP/bit.h" // countl_one, countr_zero +#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits +#include "src/__support/CPP/type_traits.h" // is_unsigned_v, is_constant_evaluated #include "src/__support/macros/attributes.h" // LIBC_INLINE namespace LIBC_NAMESPACE { @@ -32,199 +32,94 @@ mask_trailing_ones() { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - constexpr T MASK(mask_trailing_ones()); - return T(~MASK); // bitwise NOT performs integer promotion. + return T(~mask_trailing_ones()); } -// Add with carry -template struct SumCarry { - T sum; - T carry; -}; - -// This version is always valid for constexpr. -template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> -add_with_carry_const(T a, T b, T carry_in) { - T tmp = a + carry_in; - T sum = b + tmp; - T carry_out = (sum < b) + (tmp < a); - return {sum, carry_out}; -} - -template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> -add_with_carry(T a, T b, T carry_in) { - return add_with_carry_const(a, b, carry_in); -} - -#if __has_builtin(__builtin_addc) -// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned char a, unsigned char b, - unsigned char carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcb(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned short a, unsigned short b, - unsigned short carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcs(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned int a, unsigned int b, - unsigned int carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addc(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned long a, unsigned long b, - unsigned long carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcl(a, b, carry_in, &result.carry); - return result; - } +// Create a bitmask with the count right-most bits set to 0, and all other bits +// set to 1. Only unsigned types are allowed. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_zeros() { + return mask_leading_ones(); } -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned long long a, unsigned long long b, - unsigned long long carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcll(a, b, carry_in, &result.carry); - return result; - } +// Create a bitmask with the count left-most bits set to 0, and all other bits +// set to 1. Only unsigned types are allowed. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_zeros() { + return mask_trailing_ones(); } -#endif // __has_builtin(__builtin_addc) - -// Subtract with borrow -template struct DiffBorrow { - T diff; - T borrow; -}; - -// This version is always valid for constexpr. +// Returns whether 'a + b' overflows, the result is stored in 'res'. template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> -sub_with_borrow_const(T a, T b, T borrow_in) { - T tmp = a - b; - T diff = tmp - borrow_in; - T borrow_out = (diff > tmp) + (tmp > a); - return {diff, borrow_out}; +[[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T &res) { + return __builtin_add_overflow(a, b, &res); } -// This version is not always valid for constepxr because it's overriden below -// if builtins are available. +// Returns whether 'a - b' overflows, the result is stored in 'res'. template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> -sub_with_borrow(T a, T b, T borrow_in) { - return sub_with_borrow_const(a, b, borrow_in); +[[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T &res) { + return __builtin_sub_overflow(a, b, &res); } -#if __has_builtin(__builtin_subc) -// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned char a, unsigned char b, - unsigned char borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcb(a, b, borrow_in, &result.borrow); - return result; - } -} +#define RETURN_IF(TYPE, BUILTIN) \ + if constexpr (cpp::is_same_v) \ + return BUILTIN(a, b, carry_in, carry_out); -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned short a, unsigned short b, - unsigned short borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcs(a, b, borrow_in, &result.borrow); - return result; - } -} - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned int a, unsigned int b, - unsigned int borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subc(a, b, borrow_in, &result.borrow); - return result; - } -} - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned long a, unsigned long b, - unsigned long borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcl(a, b, borrow_in, &result.borrow); - return result; +// Returns the result of 'a + b' taking into account 'carry_in'. +// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. +// We keep the pass by pointer interface for consistency with the intrinsic. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +add_with_carry(T a, T b, T carry_in, T &carry_out) { + if constexpr (!cpp::is_constant_evaluated()) { +#if __has_builtin(__builtin_addcb) + RETURN_IF(unsigned char, __builtin_addcb) +#elif __has_builtin(__builtin_addcs) + RETURN_IF(unsigned short, __builtin_addcs) +#elif __has_builtin(__builtin_addc) + RETURN_IF(unsigned int, __builtin_addc) +#elif __has_builtin(__builtin_addcl) + RETURN_IF(unsigned long, __builtin_addcl) +#elif __has_builtin(__builtin_addcll) + RETURN_IF(unsigned long long, __builtin_addcll) +#endif } + T sum; + T carry1 = add_overflow(a, b, sum); + T carry2 = add_overflow(sum, carry_in, sum); + carry_out = carry1 | carry2; + return sum; } -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned long long a, unsigned long long b, - unsigned long long borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcll(a, b, borrow_in, &result.borrow); - return result; +// Returns the result of 'a - b' taking into account 'carry_in'. +// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. +// We keep the pass by pointer interface for consistency with the intrinsic. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +sub_with_borrow(T a, T b, T carry_in, T &carry_out) { + if constexpr (!cpp::is_constant_evaluated()) { +#if __has_builtin(__builtin_subcb) + RETURN_IF(unsigned char, __builtin_subcb) +#elif __has_builtin(__builtin_subcs) + RETURN_IF(unsigned short, __builtin_subcs) +#elif __has_builtin(__builtin_subc) + RETURN_IF(unsigned int, __builtin_subc) +#elif __has_builtin(__builtin_subcl) + RETURN_IF(unsigned long, __builtin_subcl) +#elif __has_builtin(__builtin_subcll) + RETURN_IF(unsigned long long, __builtin_subcll) +#endif } + T sub; + T carry1 = sub_overflow(a, b, sub); + T carry2 = sub_overflow(sub, carry_in, sub); + carry_out = carry1 | carry2; + return sub; } -#endif // __has_builtin(__builtin_subc) +#undef RETURN_IF template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h index ee6667b..2f713fc 100644 --- a/libc/src/__support/number_pair.h +++ b/libc/src/__support/number_pair.h @@ -20,17 +20,6 @@ template struct NumberPair { T hi = T(0); }; -template -cpp::enable_if_t && cpp::is_unsigned_v, - NumberPair> constexpr split(T a) { - constexpr size_t HALF_BIT_WIDTH = sizeof(T) * 4; - constexpr T LOWER_HALF_MASK = (T(1) << HALF_BIT_WIDTH) - T(1); - NumberPair result; - result.lo = a & LOWER_HALF_MASK; - result.hi = a >> HALF_BIT_WIDTH; - return result; -} - } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H diff --git a/libc/test/src/__support/integer_literals_test.cpp b/libc/test/src/__support/integer_literals_test.cpp index 5298cf3..cbc906a 100644 --- a/libc/test/src/__support/integer_literals_test.cpp +++ b/libc/test/src/__support/integer_literals_test.cpp @@ -133,3 +133,24 @@ TEST(LlvmLibcIntegerLiteralTest, u256) { U256_MAX, 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u256); } + +TEST(LlvmLibcIntegerLiteralTest, parse_bigint) { + using T = LIBC_NAMESPACE::Int<128>; + struct { + const char *str; + T expected; + } constexpr TEST_CASES[] = { + {"0", 0}, {"-1", -1}, {"+1", 1}, {"-0xFF", -255}, {"-0b11", -3}, + }; + for (auto tc : TEST_CASES) { + T actual = LIBC_NAMESPACE::parse_bigint(tc.str); + EXPECT_EQ(actual, tc.expected); + } +} + +TEST(LlvmLibcIntegerLiteralTest, parse_bigint_invalid) { + using T = LIBC_NAMESPACE::Int<128>; + const T expected; // default construction + EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(nullptr), expected); + EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(""), expected); +} diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index e88b3e1..401e631e 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -101,4 +101,61 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } +using UnsignedTypes = testing::TypeList< +#if defined(__SIZEOF_INT128__) + __uint128_t, +#endif + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long>; + +TYPED_TEST(LlvmLibcBlockMathExtrasTest, add_overflow, UnsignedTypes) { + constexpr T ZERO = cpp::numeric_limits::min(); + constexpr T ONE(1); + constexpr T MAX = cpp::numeric_limits::max(); + constexpr T BEFORE_MAX = MAX - 1; + + const struct { + T lhs; + T rhs; + T sum; + bool carry; + } TESTS[] = { + {ZERO, ONE, ONE, false}, // 0x00 + 0x01 = 0x01 + {BEFORE_MAX, ONE, MAX, false}, // 0xFE + 0x01 = 0xFF + {MAX, ONE, ZERO, true}, // 0xFF + 0x01 = 0x00 (carry) + {MAX, MAX, BEFORE_MAX, true}, // 0xFF + 0xFF = 0xFE (carry) + }; + for (auto tc : TESTS) { + T sum; + bool carry = add_overflow(tc.lhs, tc.rhs, sum); + EXPECT_EQ(sum, tc.sum); + EXPECT_EQ(carry, tc.carry); + } +} + +TYPED_TEST(LlvmLibcBlockMathExtrasTest, sub_overflow, UnsignedTypes) { + constexpr T ZERO = cpp::numeric_limits::min(); + constexpr T ONE(1); + constexpr T MAX = cpp::numeric_limits::max(); + constexpr T BEFORE_MAX = MAX - 1; + + const struct { + T lhs; + T rhs; + T sub; + bool carry; + } TESTS[] = { + {ONE, ZERO, ONE, false}, // 0x01 - 0x00 = 0x01 + {MAX, MAX, ZERO, false}, // 0xFF - 0xFF = 0x00 + {ZERO, ONE, MAX, true}, // 0x00 - 0x01 = 0xFF (carry) + {BEFORE_MAX, MAX, MAX, true}, // 0xFE - 0xFF = 0xFF (carry) + }; + for (auto tc : TESTS) { + T sub; + bool carry = sub_overflow(tc.lhs, tc.rhs, sub); + EXPECT_EQ(sub, tc.sub); + EXPECT_EQ(carry, tc.carry); + } +} + } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/uint_test.cpp index 5764324..5696e54 100644 --- a/libc/test/src/__support/uint_test.cpp +++ b/libc/test/src/__support/uint_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/optional.h" #include "src/__support/UInt.h" +#include "src/__support/integer_literals.h" // parse_unsigned_bigint #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #include "include/llvm-libc-macros/math-macros.h" // HUGE_VALF, HUGE_VALF @@ -15,6 +16,195 @@ namespace LIBC_NAMESPACE { +enum Value { ZERO, ONE, TWO, MIN, MAX }; + +template auto create(Value value) { + switch (value) { + case ZERO: + return T(0); + case ONE: + return T(1); + case TWO: + return T(2); + case MIN: + return T::min(); + case MAX: + return T::max(); + } +} + +using Types = testing::TypeList< // +#ifdef LIBC_TYPES_HAS_INT64 + BigInt<64, false, uint64_t>, // 64-bits unsigned (1 x uint64_t) + BigInt<64, true, uint64_t>, // 64-bits signed (1 x uint64_t) +#endif +#ifdef LIBC_TYPES_HAS_INT128 + BigInt<128, false, __uint128_t>, // 128-bits unsigned (1 x __uint128_t) + BigInt<128, true, __uint128_t>, // 128-bits signed (1 x __uint128_t) +#endif + BigInt<16, false, uint16_t>, // 16-bits unsigned (1 x uint16_t) + BigInt<16, true, uint16_t>, // 16-bits signed (1 x uint16_t) + BigInt<64, false, uint16_t>, // 64-bits unsigned (4 x uint16_t) + BigInt<64, true, uint16_t> // 64-bits signed (4 x uint16_t) + >; + +#define ASSERT_SAME(A, B) ASSERT_TRUE((A) == (B)) + +TYPED_TEST(LlvmLibcUIntClassTest, Additions, Types) { + ASSERT_SAME(create(ZERO) + create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) + create(ZERO), create(ONE)); + ASSERT_SAME(create(ZERO) + create(ONE), create(ONE)); + ASSERT_SAME(create(ONE) + create(ONE), create(TWO)); + // 2's complement addition works for signed and unsigned types. + // - unsigned : 0xff + 0x01 = 0x00 (255 + 1 = 0) + // - signed : 0xef + 0x01 = 0xf0 (127 + 1 = -128) + ASSERT_SAME(create(MAX) + create(ONE), create(MIN)); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Subtraction, Types) { + ASSERT_SAME(create(ZERO) - create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) - create(ONE), create(ZERO)); + ASSERT_SAME(create(ONE) - create(ZERO), create(ONE)); + // 2's complement subtraction works for signed and unsigned types. + // - unsigned : 0x00 - 0x01 = 0xff ( 0 - 1 = 255) + // - signed : 0xf0 - 0x01 = 0xef (-128 - 1 = 127) + ASSERT_SAME(create(MIN) - create(ONE), create(MAX)); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Multiplication, Types) { + ASSERT_SAME(create(ZERO) * create(ZERO), create(ZERO)); + ASSERT_SAME(create(ZERO) * create(ONE), create(ZERO)); + ASSERT_SAME(create(ONE) * create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) * create(ONE), create(ONE)); + ASSERT_SAME(create(ONE) * create(TWO), create(TWO)); + ASSERT_SAME(create(TWO) * create(ONE), create(TWO)); + // - unsigned : 0xff x 0xff = 0x01 (mod 0xff) + // - signed : 0xef x 0xef = 0x01 (mod 0xff) + ASSERT_SAME(create(MAX) * create(MAX), create(ONE)); +} + +template void print(const char *msg, T value) { + testing::tlog << msg; + IntegerToString buffer(value); + testing::tlog << buffer.view() << "\n"; +} + +TEST(LlvmLibcUIntClassTest, SignedAddSub) { + // Computations performed by https://www.wolframalpha.com/ + using T = BigInt<128, true, uint32_t>; + const T a = parse_bigint("1927508279017230597"); + const T b = parse_bigint("278789278723478925"); + const T s = parse_bigint("2206297557740709522"); + // Addition + ASSERT_SAME(a + b, s); + ASSERT_SAME(b + a, s); // commutative + // Subtraction + ASSERT_SAME(a - s, -b); + ASSERT_SAME(s - a, b); +} + +TEST(LlvmLibcUIntClassTest, SignedMulDiv) { + // Computations performed by https://www.wolframalpha.com/ + using T = BigInt<128, true, uint16_t>; + struct { + const char *a; + const char *b; + const char *mul; + } const test_cases[] = {{"-4", "3", "-12"}, + {"-3", "-3", "9"}, + {"1927508279017230597", "278789278723478925", + "537368642840747885329125014794668225"}}; + for (auto tc : test_cases) { + const T a = parse_bigint(tc.a); + const T b = parse_bigint(tc.b); + const T mul = parse_bigint(tc.mul); + // Multiplication + ASSERT_SAME(a * b, mul); + ASSERT_SAME(b * a, mul); // commutative + ASSERT_SAME(a * -b, -mul); // sign + ASSERT_SAME(-a * b, -mul); // sign + ASSERT_SAME(-a * -b, mul); // sign + // Division + ASSERT_SAME(mul / a, b); + ASSERT_SAME(mul / b, a); + ASSERT_SAME(-mul / a, -b); // sign + ASSERT_SAME(mul / -a, -b); // sign + ASSERT_SAME(-mul / -a, b); // sign + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, Division, Types) { + ASSERT_SAME(create(ZERO) / create(ONE), create(ZERO)); + ASSERT_SAME(create(MAX) / create(ONE), create(MAX)); + ASSERT_SAME(create(MAX) / create(MAX), create(ONE)); + ASSERT_SAME(create(ONE) / create(ONE), create(ONE)); + if constexpr (T::SIGNED) { + // Special case found by fuzzing. + ASSERT_SAME(create(MIN) / create(MIN), create(ONE)); + } + // - unsigned : 0xff / 0x02 = 0x7f + // - signed : 0xef / 0x02 = 0x77 + ASSERT_SAME(create(MAX) / create(TWO), (create(MAX) >> 1)); + + using word_type = typename T::word_type; + const T zero_one_repeated = T::all_ones() / T(0xff); + const word_type pattern = word_type(~0) / word_type(0xff); + for (const word_type part : zero_one_repeated.val) { + if constexpr (T::SIGNED == false) { + EXPECT_EQ(part, pattern); + } + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, is_neg, Types) { + EXPECT_FALSE(create(ZERO).is_neg()); + EXPECT_FALSE(create(ONE).is_neg()); + EXPECT_FALSE(create(TWO).is_neg()); + EXPECT_EQ(create(MIN).is_neg(), T::SIGNED); + EXPECT_FALSE(create(MAX).is_neg()); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Masks, Types) { + if constexpr (!T::SIGNED) { + constexpr size_t BITS = T::BITS; + // mask_trailing_ones + ASSERT_SAME((mask_trailing_ones()), T::zero()); + ASSERT_SAME((mask_trailing_ones()), T::one()); + ASSERT_SAME((mask_trailing_ones()), T::all_ones() >> 1); + ASSERT_SAME((mask_trailing_ones()), T::all_ones()); + // mask_leading_ones + ASSERT_SAME((mask_leading_ones()), T::zero()); + ASSERT_SAME((mask_leading_ones()), T::one() << (BITS - 1)); + ASSERT_SAME((mask_leading_ones()), T::all_ones() - T::one()); + ASSERT_SAME((mask_leading_ones()), T::all_ones()); + // mask_trailing_zeros + ASSERT_SAME((mask_trailing_zeros()), T::all_ones()); + ASSERT_SAME((mask_trailing_zeros()), T::all_ones() - T::one()); + ASSERT_SAME((mask_trailing_zeros()), T::one() << (BITS - 1)); + ASSERT_SAME((mask_trailing_zeros()), T::zero()); + // mask_trailing_zeros + ASSERT_SAME((mask_leading_zeros()), T::all_ones()); + ASSERT_SAME((mask_leading_zeros()), T::all_ones() >> 1); + ASSERT_SAME((mask_leading_zeros()), T::one()); + ASSERT_SAME((mask_leading_zeros()), T::zero()); + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) { + if constexpr (!T::SIGNED) { + for (size_t i = 0; i <= T::BITS; ++i) { + const auto l_one = T::all_ones() << i; // 0b111...000 + const auto r_one = T::all_ones() >> i; // 0b000...111 + const int zeros = i; + const int ones = T::BITS - zeros; + ASSERT_EQ(cpp::countr_one(r_one), ones); + ASSERT_EQ(cpp::countl_one(l_one), ones); + ASSERT_EQ(cpp::countr_zero(l_one), zeros); + ASSERT_EQ(cpp::countl_zero(r_one), zeros); + } + } +} + using LL_UInt64 = UInt<64>; // We want to test UInt<128> explicitly. So, for // convenience, we use a sugar which does not conflict with the UInt128 type @@ -561,7 +751,7 @@ TEST(LlvmLibcUIntClassTest, FullMulTests) { LL_UInt##Bits a = ~LL_UInt##Bits(0); \ LL_UInt##Bits hi = a.quick_mul_hi(a); \ LL_UInt##Bits trunc = static_cast(a.ful_mul(a) >> Bits); \ - uint64_t overflow = trunc.sub(hi); \ + uint64_t overflow = trunc.sub_overflow(hi); \ EXPECT_EQ(overflow, uint64_t(0)); \ EXPECT_LE(uint64_t(trunc), uint64_t(Error)); \ } while (0) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 4f97612..c0d402a8 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -87,6 +87,7 @@ libc_test( srcs = ["uint_test.cpp"], deps = [ "//libc:__support_cpp_optional", + "//libc:__support_integer_literals", "//libc:__support_macros_properties_types", "//libc:__support_uint", "//libc:llvm_libc_macros_math_macros", -- cgit v1.1 From 47e996d89d4d1e229451594d4b0752b71e8e231c Mon Sep 17 00:00:00 2001 From: Vyacheslav Levytskyy Date: Thu, 4 Apr 2024 10:50:35 +0200 Subject: [SPIR-V] Fix OpVariable instructions place in a function (#87554) This PR: * fixes OpVariable instructions place in a function (see https://github.com/llvm/llvm-project/issues/66261), * improves type inference, * helps avoiding unneeded bitcasts when validating function call's This allows to improve existing and add new test cases with more strict checks. OpVariable fix refers to "All OpVariable instructions in a function must be the first instructions in the first block" requirement from SPIR-V spec. --- llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp | 8 ++- llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h | 6 +-- llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp | 2 +- llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp | 19 ++++++- llvm/test/CodeGen/SPIRV/OpVariable_order.ll | 14 +++-- .../SPIRV/pointers/type-deduce-by-call-chain.ll | 26 +++++++++- .../SPIRV/pointers/type-deduce-call-no-bitcast.ll | 60 ++++++++++++++++++++++ 7 files changed, 122 insertions(+), 13 deletions(-) create mode 100644 llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll diff --git a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp index 1674cef..9e4ba21 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCallLowering.cpp @@ -243,8 +243,12 @@ static SPIRVType *getArgSPIRVType(const Function &F, unsigned ArgIdx, continue; MetadataAsValue *VMD = cast(II->getOperand(1)); - SPIRVType *ElementType = GR->getOrCreateSPIRVType( - cast(VMD->getMetadata())->getType(), MIRBuilder); + Type *ElementTy = cast(VMD->getMetadata())->getType(); + if (isUntypedPointerTy(ElementTy)) + ElementTy = + TypedPointerType::get(IntegerType::getInt8Ty(II->getContext()), + getPointerAddressSpace(ElementTy)); + SPIRVType *ElementType = GR->getOrCreateSPIRVType(ElementTy, MIRBuilder); return GR->getOrCreateSPIRVPointerType( ElementType, MIRBuilder, addressSpaceToStorageClass( diff --git a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h index e0099e5..ac79937 100644 --- a/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h +++ b/llvm/lib/Target/SPIRV/SPIRVGlobalRegistry.h @@ -47,7 +47,7 @@ class SPIRVGlobalRegistry { DenseMap InstrToFunction; // Maps Functions to their calls (in a form of the machine instruction, // OpFunctionCall) that happened before the definition is available - DenseMap> ForwardCalls; + DenseMap> ForwardCalls; // Look for an equivalent of the newType in the map. Return the equivalent // if it's found, otherwise insert newType to the map and return the type. @@ -215,12 +215,12 @@ public: if (It == ForwardCalls.end()) ForwardCalls[F] = {MI}; else - It->second.push_back(MI); + It->second.insert(MI); } // Map a Function to the vector of machine instructions that represents // forward function calls or to nullptr if not found. - SmallVector *getForwardCalls(const Function *F) { + SmallPtrSet *getForwardCalls(const Function *F) { auto It = ForwardCalls.find(F); return It == ForwardCalls.end() ? nullptr : &It->second; } diff --git a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp index 90a3155..d450078 100644 --- a/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVISelLowering.cpp @@ -193,7 +193,7 @@ void validateForwardCalls(const SPIRVSubtarget &STI, MachineRegisterInfo *DefMRI, SPIRVGlobalRegistry &GR, MachineInstr &FunDef) { const Function *F = GR.getFunctionByDefinition(&FunDef); - if (SmallVector *FwdCalls = GR.getForwardCalls(F)) + if (SmallPtrSet *FwdCalls = GR.getForwardCalls(F)) for (MachineInstr *FunCall : *FwdCalls) { MachineRegisterInfo *CallMRI = &FunCall->getParent()->getParent()->getRegInfo(); diff --git a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp index f4525e71..49749b5 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVInstructionSelector.cpp @@ -1825,7 +1825,24 @@ bool SPIRVInstructionSelector::selectAllocaArray(Register ResVReg, bool SPIRVInstructionSelector::selectFrameIndex(Register ResVReg, const SPIRVType *ResType, MachineInstr &I) const { - return BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(SPIRV::OpVariable)) + // Change order of instructions if needed: all OpVariable instructions in a + // function must be the first instructions in the first block + MachineFunction *MF = I.getParent()->getParent(); + MachineBasicBlock *MBB = &MF->front(); + auto It = MBB->SkipPHIsAndLabels(MBB->begin()), E = MBB->end(); + bool IsHeader = false; + unsigned Opcode; + for (; It != E && It != I; ++It) { + Opcode = It->getOpcode(); + if (Opcode == SPIRV::OpFunction || Opcode == SPIRV::OpFunctionParameter) { + IsHeader = true; + } else if (IsHeader && + !(Opcode == SPIRV::ASSIGN_TYPE || Opcode == SPIRV::OpLabel)) { + ++It; + break; + } + } + return BuildMI(*MBB, It, It->getDebugLoc(), TII.get(SPIRV::OpVariable)) .addDef(ResVReg) .addUse(GR.getSPIRVTypeID(ResType)) .addImm(static_cast(SPIRV::StorageClass::Function)) diff --git a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll index a4ca3aa..6057bf38 100644 --- a/llvm/test/CodeGen/SPIRV/OpVariable_order.ll +++ b/llvm/test/CodeGen/SPIRV/OpVariable_order.ll @@ -1,10 +1,14 @@ -; REQUIRES: spirv-tools -; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | not spirv-val 2>&1 | FileCheck %s +; All OpVariable instructions in a function must be the first instructions in the first block -; TODO(#66261): The SPIR-V backend should reorder OpVariable instructions so this doesn't fail, -; but in the meantime it's a good example of the spirv-val tool working as intended. +; RUN: llc -O0 -mtriple=spirv-unknown-linux %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-linux %s -o - -filetype=obj | spirv-val %} -; CHECK: All OpVariable instructions in a function must be the first instructions in the first block. +; CHECK-SPIRV: OpFunction +; CHECK-SPIRV-NEXT: OpLabel +; CHECK-SPIRV-NEXT: OpVariable +; CHECK-SPIRV-NEXT: OpVariable +; CHECK-SPIRV: OpReturn +; CHECK-SPIRV: OpFunctionEnd define void @main() #1 { entry: diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll index 1071d34..b039f80 100644 --- a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-by-call-chain.ll @@ -10,22 +10,46 @@ ; CHECK-SPIRV-DAG: OpName %[[FooObj:.*]] "foo_object" ; CHECK-SPIRV-DAG: OpName %[[FooMemOrder:.*]] "mem_order" ; CHECK-SPIRV-DAG: OpName %[[FooFunc:.*]] "foo" + ; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 32 0 ; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] ; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer CrossWorkgroup %[[TyLong]] ; CHECK-SPIRV-DAG: %[[TyFunPtrLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyPtrLong]] -; CHECK-SPIRV-DAG: %[[TyGenPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrLong:.*]] = OpTypePointer Generic %[[TyGenPtrLong]] ; CHECK-SPIRV-DAG: %[[TyFunGenPtrLongLong:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrLong]] %[[TyLong]] +; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]] +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]] ; CHECK-SPIRV-DAG: %[[Const3:.*]] = OpConstant %[[TyLong]] 3 + ; CHECK-SPIRV: %[[FunTest]] = OpFunction %[[TyVoid]] None %[[TyFunPtrLong]] ; CHECK-SPIRV: %[[ArgCum]] = OpFunctionParameter %[[TyPtrLong]] + ; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[Addr]] %[[Const3]] + +; CHECK-SPIRV: %[[HalfAddr:.*]] = OpPtrCastToGeneric +; CHECK-SPIRV-NEXT: %[[HalfAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[HalfAddr]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[HalfAddrCasted]] %[[Const3]] + +; CHECK-SPIRV: %[[DblAddr:.*]] = OpPtrCastToGeneric +; CHECK-SPIRV-NEXT: %[[DblAddrCasted:.*]] = OpBitcast %[[TyGenPtrLong]] %[[DblAddr]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[FooFunc]] %[[DblAddrCasted]] %[[Const3]] + ; CHECK-SPIRV: %[[FooStub]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]] ; CHECK-SPIRV: %[[StubObj]] = OpFunctionParameter %[[TyGenPtrLong]] ; CHECK-SPIRV: %[[MemOrder]] = OpFunctionParameter %[[TyLong]] + +; CHECK-SPIRV: %[[ObjectAddr:.*]] = OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: %[[ToGeneric:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[ObjectAddr]] +; CHECK-SPIRV-NEXT: %[[Casted:.*]] = OpBitcast %[[TyGenPtrPtrLong]] %[[ToGeneric]] +; CHECK-SPIRV-NEXT: OpStore %[[Casted]] %[[StubObj]] + ; CHECK-SPIRV: %[[FooFunc]] = OpFunction %[[TyVoid]] None %[[TyFunGenPtrLongLong]] ; CHECK-SPIRV: %[[FooObj]] = OpFunctionParameter %[[TyGenPtrLong]] ; CHECK-SPIRV: %[[FooMemOrder]] = OpFunctionParameter %[[TyLong]] + ; CHECK-SPIRV: OpFunctionCall %[[TyVoid]] %[[FooStub]] %[[FooObj]] %[[FooMemOrder]] define spir_kernel void @test(ptr addrspace(1) noundef align 4 %_arg_cum) { diff --git a/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll new file mode 100644 index 0000000..edb31ff --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/pointers/type-deduce-call-no-bitcast.ll @@ -0,0 +1,60 @@ +; RUN: llc -O0 -mtriple=spirv64-unknown-unknown %s -o - | FileCheck %s --check-prefix=CHECK-SPIRV +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s -o - -filetype=obj | spirv-val %} + +; CHECK-SPIRV-DAG: OpName %[[Foo:.*]] "foo" +; CHECK-SPIRV-DAG: %[[TyChar:.*]] = OpTypeInt 8 0 +; CHECK-SPIRV-DAG: %[[TyVoid:.*]] = OpTypeVoid +; CHECK-SPIRV-DAG: %[[TyGenPtrChar:.*]] = OpTypePointer Generic %[[TyChar]] +; CHECK-SPIRV-DAG: %[[TyFunBar:.*]] = OpTypeFunction %[[TyVoid]] %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyLong:.*]] = OpTypeInt 64 0 +; CHECK-SPIRV-DAG: %[[TyGenPtrPtrChar:.*]] = OpTypePointer Generic %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyFunFoo:.*]] = OpTypeFunction %[[TyVoid]] %[[TyLong]] %[[TyGenPtrPtrChar]] %[[TyGenPtrPtrChar]] +; CHECK-SPIRV-DAG: %[[TyStruct:.*]] = OpTypeStruct %[[TyLong]] +; CHECK-SPIRV-DAG: %[[Const100:.*]] = OpConstant %[[TyLong]] 100 +; CHECK-SPIRV-DAG: %[[TyFunPtrGenPtrChar:.*]] = OpTypePointer Function %[[TyGenPtrChar]] +; CHECK-SPIRV-DAG: %[[TyPtrStruct:.*]] = OpTypePointer Generic %[[TyStruct]] +; CHECK-SPIRV-DAG: %[[TyPtrLong:.*]] = OpTypePointer Generic %[[TyLong]] + +; CHECK-SPIRV: %[[Bar:.*]] = OpFunction %[[TyVoid]] None %[[TyFunBar]] +; CHECK-SPIRV: %[[BarArg:.*]] = OpFunctionParameter %[[TyGenPtrChar]] +; CHECK-SPIRV-NEXT: OpLabel +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV-NEXT: OpVariable %[[TyFunPtrGenPtrChar]] Function +; CHECK-SPIRV: %[[Var1:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]] +; CHECK-SPIRV: %[[Var2:.*]] = OpPtrCastToGeneric %[[TyGenPtrPtrChar]] %[[#]] +; CHECK-SPIRV: OpStore %[[#]] %[[BarArg]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var1]] %[[Var2]] +; CHECK-SPIRV-NEXT: OpFunctionCall %[[TyVoid]] %[[Foo]] %[[Const100]] %[[Var2]] %[[Var1]] + +; CHECK-SPIRV: %[[Foo]] = OpFunction %[[TyVoid]] None %[[TyFunFoo]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyLong]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]] +; CHECK-SPIRV-NEXT: OpFunctionParameter %[[TyGenPtrPtrChar]] + +%class.CustomType = type { i64 } + +define linkonce_odr dso_local spir_func void @bar(ptr addrspace(4) noundef %first) { +entry: + %first.addr = alloca ptr addrspace(4) + %first.addr.ascast = addrspacecast ptr %first.addr to ptr addrspace(4) + %temp = alloca ptr addrspace(4), align 8 + %temp.ascast = addrspacecast ptr %temp to ptr addrspace(4) + store ptr addrspace(4) %first, ptr %first.addr + call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast) + call spir_func void @foo(i64 noundef 100, ptr addrspace(4) noundef dereferenceable(8) %temp.ascast, ptr addrspace(4) noundef dereferenceable(8) %first.addr.ascast) + %var = alloca ptr addrspace(4), align 8 + ret void +} + +define linkonce_odr dso_local spir_func void @foo(i64 noundef %offset, ptr addrspace(4) noundef dereferenceable(8) %in_acc1, ptr addrspace(4) noundef dereferenceable(8) %out_acc1) { +entry: + %r0 = load ptr addrspace(4), ptr addrspace(4) %in_acc1 + %arrayidx = getelementptr inbounds %class.CustomType, ptr addrspace(4) %r0, i64 42 + %r1 = load i64, ptr addrspace(4) %arrayidx + %r3 = load ptr addrspace(4), ptr addrspace(4) %out_acc1 + %r4 = getelementptr %class.CustomType, ptr addrspace(4) %r3, i64 43 + store i64 %r1, ptr addrspace(4) %r4 + ret void +} + -- cgit v1.1 From 12735916bd3a63aa9f316af8eebfe9420cfec489 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 4 Apr 2024 11:10:30 +0200 Subject: Revert "[libc] Refactor `BigInt`" (#87612) Reverts llvm/llvm-project#86137 Some aarch64 compilers seem to consider that `uint128_t` is not `is_trivially_constructible` which prevents `bit_cast`-ing. --- libc/fuzzing/CMakeLists.txt | 1 - libc/fuzzing/__support/CMakeLists.txt | 7 - libc/fuzzing/__support/uint_fuzz.cpp | 70 -- libc/src/__support/FPUtil/dyadic_float.h | 6 +- libc/src/__support/UInt.h | 1126 ++++++++++---------- libc/src/__support/float_to_string.h | 7 +- libc/src/__support/integer_literals.h | 25 +- libc/src/__support/math_extras.h | 249 +++-- libc/src/__support/number_pair.h | 11 + libc/test/src/__support/integer_literals_test.cpp | 21 - libc/test/src/__support/math_extras_test.cpp | 57 - libc/test/src/__support/uint_test.cpp | 192 +--- .../libc/test/src/__support/BUILD.bazel | 1 - 13 files changed, 762 insertions(+), 1011 deletions(-) delete mode 100644 libc/fuzzing/__support/CMakeLists.txt delete mode 100644 libc/fuzzing/__support/uint_fuzz.cpp diff --git a/libc/fuzzing/CMakeLists.txt b/libc/fuzzing/CMakeLists.txt index 816691b..8248768 100644 --- a/libc/fuzzing/CMakeLists.txt +++ b/libc/fuzzing/CMakeLists.txt @@ -1,7 +1,6 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") add_custom_target(libc-fuzzer) -add_subdirectory(__support) # TODO(#85680): Re-enable math fuzzing after headers are sorted out # add_subdirectory(math) add_subdirectory(stdlib) diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt deleted file mode 100644 index 278e914..0000000 --- a/libc/fuzzing/__support/CMakeLists.txt +++ /dev/null @@ -1,7 +0,0 @@ -add_libc_fuzzer( - uint_fuzz - SRCS - uint_fuzz.cpp - DEPENDS - libc.src.__support.uint -) diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp deleted file mode 100644 index f48f00d..0000000 --- a/libc/fuzzing/__support/uint_fuzz.cpp +++ /dev/null @@ -1,70 +0,0 @@ -#include "src/__support/CPP/bit.h" -#include "src/__support/UInt.h" -#include "src/string/memory_utils/inline_memcpy.h" - -using namespace LIBC_NAMESPACE; - -// Helper function when using gdb / lldb to set a breakpoint and inspect values. -template void debug_and_trap(const char *msg, T a, T b) { - __builtin_trap(); -} - -#define DEBUG_AND_TRAP() - -#define TEST_BINOP(OP) \ - if ((a OP b) != (static_cast(BigInt(a) OP BigInt(b)))) \ - debug_and_trap(#OP, a, b); - -#define TEST_SHIFTOP(OP) \ - if ((a OP b) != (static_cast(BigInt(a) OP b))) \ - debug_and_trap(#OP, a, b); - -#define TEST_FUNCTION(FUN) \ - if (FUN(a) != FUN(BigInt(a))) \ - debug_and_trap(#FUN, a, b); - -// Test that basic arithmetic operations of BigInt behave like their scalar -// counterparts. -template void run_tests(T a, T b) { - TEST_BINOP(+) - TEST_BINOP(-) - TEST_BINOP(*) - if (b != 0) - TEST_BINOP(/) - if (b >= 0 && b < cpp::numeric_limits::digits) { - TEST_SHIFTOP(<<) - TEST_SHIFTOP(>>) - } - if constexpr (!BigInt::SIGNED) { - TEST_FUNCTION(cpp::has_single_bit) - TEST_FUNCTION(cpp::countr_zero) - TEST_FUNCTION(cpp::countl_zero) - TEST_FUNCTION(cpp::countl_one) - TEST_FUNCTION(cpp::countr_one) - } -} - -// Reads a T from libfuzzer data. -template T read(const uint8_t *data, size_t &remainder) { - T out = 0; - constexpr size_t T_SIZE = sizeof(T); - const size_t copy_size = remainder < T_SIZE ? remainder : T_SIZE; - inline_memcpy(&out, data, copy_size); - remainder -= copy_size; - return out; -} - -template -void run_tests(const uint8_t *data, size_t size) { - const auto a = read(data, size); - const auto b = read(data, size); - run_tests(a, b); -} - -extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { - // unsigned - run_tests>(data, size); - // signed - run_tests>(data, size); - return 0; -} diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index e0c205f..73fd738 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -58,9 +58,9 @@ template struct DyadicFloat { // significant bit. LIBC_INLINE constexpr DyadicFloat &normalize() { if (!mantissa.is_zero()) { - int shift_length = cpp::countl_zero(mantissa); + int shift_length = static_cast(mantissa.clz()); exponent -= shift_length; - mantissa <<= static_cast(shift_length); + mantissa.shift_left(static_cast(shift_length)); } return *this; } @@ -233,7 +233,7 @@ LIBC_INLINE constexpr DyadicFloat quick_add(DyadicFloat a, result.sign = a.sign; result.exponent = a.exponent; result.mantissa = a.mantissa; - if (result.mantissa.add_overflow(b.mantissa)) { + if (result.mantissa.add(b.mantissa)) { // Mantissa addition overflow. result.shift_right(1); result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] |= diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index c524de3..282efdb 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -14,11 +14,10 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY -#include "src/__support/macros/properties/compiler.h" // LIBC_COMPILER_IS_CLANG +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128, LIBC_TYPES_HAS_INT64 -#include "src/__support/math_extras.h" // add_with_carry, sub_with_borrow +#include "src/__support/math_extras.h" // SumCarry, DiffBorrow #include "src/__support/number_pair.h" #include // For size_t @@ -26,321 +25,71 @@ namespace LIBC_NAMESPACE { -namespace multiword { - -// A type trait mapping unsigned integers to their half-width unsigned -// counterparts. +namespace internal { template struct half_width; -template <> struct half_width : cpp::type_identity {}; -template <> struct half_width : cpp::type_identity {}; -#ifdef LIBC_TYPES_HAS_INT64 + template <> struct half_width : cpp::type_identity {}; +template <> struct half_width : cpp::type_identity {}; +template <> struct half_width : cpp::type_identity {}; #ifdef LIBC_TYPES_HAS_INT128 template <> struct half_width<__uint128_t> : cpp::type_identity {}; #endif // LIBC_TYPES_HAS_INT128 -#endif // LIBC_TYPES_HAS_INT64 -template using half_width_t = typename half_width::type; - -// An array of two elements that can be used in multiword operations. -template struct DoubleWide final : cpp::array { - using UP = cpp::array; - using UP::UP; - LIBC_INLINE constexpr DoubleWide(T lo, T hi) : UP({lo, hi}) {} -}; - -// Converts an unsigned value into a DoubleWide>. -template LIBC_INLINE constexpr auto split(T value) { - static_assert(cpp::is_unsigned_v); - return cpp::bit_cast>>(value); -} - -// The low part of a DoubleWide value. -template LIBC_INLINE constexpr T lo(const DoubleWide &value) { - return value[0]; -} -// The high part of a DoubleWide value. -template LIBC_INLINE constexpr T hi(const DoubleWide &value) { - return value[1]; -} -// The low part of an unsigned value. -template LIBC_INLINE constexpr half_width_t lo(T value) { - return lo(split(value)); -} -// The high part of an unsigned value. -template LIBC_INLINE constexpr half_width_t hi(T value) { - return hi(split(value)); -} - -// Returns 'a' times 'b' in a DoubleWide. Cannot overflow by construction. -template -LIBC_INLINE constexpr DoubleWide mul2(word a, word b) { - if constexpr (cpp::is_same_v) { - return split(uint16_t(a) * uint16_t(b)); - } else if constexpr (cpp::is_same_v) { - return split(uint32_t(a) * uint32_t(b)); - } -#ifdef LIBC_TYPES_HAS_INT64 - else if constexpr (cpp::is_same_v) { - return split(uint64_t(a) * uint64_t(b)); - } -#endif -#ifdef LIBC_TYPES_HAS_INT128 - else if constexpr (cpp::is_same_v) { - return split<__uint128_t>(__uint128_t(a) * __uint128_t(b)); - } -#endif - else { - using half_word = half_width_t; - const auto shiftl = [](word value) -> word { - return value << cpp::numeric_limits::digits; - }; - const auto shiftr = [](word value) -> word { - return value >> cpp::numeric_limits::digits; - }; - // Here we do a one digit multiplication where 'a' and 'b' are of type - // word. We split 'a' and 'b' into half words and perform the classic long - // multiplication with 'a' and 'b' being two-digit numbers. - - // a a_hi a_lo - // x b => x b_hi b_lo - // ---- ----------- - // c result - // We convert 'lo' and 'hi' from 'half_word' to 'word' so multiplication - // doesn't overflow. - const word a_lo = lo(a); - const word b_lo = lo(b); - const word a_hi = hi(a); - const word b_hi = hi(b); - const word step1 = b_lo * a_lo; // no overflow; - const word step2 = b_lo * a_hi; // no overflow; - const word step3 = b_hi * a_lo; // no overflow; - const word step4 = b_hi * a_hi; // no overflow; - word lo_digit = step1; - word hi_digit = step4; - const word no_carry = 0; - word carry; - word _; // unused carry variable. - lo_digit = add_with_carry(lo_digit, shiftl(step2), no_carry, carry); - hi_digit = add_with_carry(hi_digit, shiftr(step2), carry, _); - lo_digit = add_with_carry(lo_digit, shiftl(step3), no_carry, carry); - hi_digit = add_with_carry(hi_digit, shiftr(step3), carry, _); - return DoubleWide(lo_digit, hi_digit); - } -} - -// In-place 'dst op= rhs' with operation with carry propagation. Returns carry. -template -LIBC_INLINE constexpr word inplace_binop(Function op_with_carry, - cpp::array &dst, - const cpp::array &rhs) { - static_assert(N >= M); - word carry_out = 0; - for (size_t i = 0; i < N; ++i) { - const bool has_rhs_value = i < M; - const word rhs_value = has_rhs_value ? rhs[i] : 0; - const word carry_in = carry_out; - dst[i] = op_with_carry(dst[i], rhs_value, carry_in, carry_out); - // stop early when rhs is over and no carry is to be propagated. - if (!has_rhs_value && carry_out == 0) - break; - } - return carry_out; -} -// In-place addition. Returns carry. -template -LIBC_INLINE constexpr word add_with_carry(cpp::array &dst, - const cpp::array &rhs) { - return inplace_binop(LIBC_NAMESPACE::add_with_carry, dst, rhs); -} +template using half_width_t = typename half_width::type; -// In-place subtraction. Returns borrow. -template -LIBC_INLINE constexpr word sub_with_borrow(cpp::array &dst, - const cpp::array &rhs) { - return inplace_binop(LIBC_NAMESPACE::sub_with_borrow, dst, rhs); -} +template constexpr NumberPair full_mul(T a, T b) { + NumberPair pa = split(a); + NumberPair pb = split(b); + NumberPair prod; -// In-place multiply-add. Returns carry. -// i.e., 'dst += b * c' -template -LIBC_INLINE constexpr word mul_add_with_carry(cpp::array &dst, word b, - word c) { - return add_with_carry(dst, mul2(b, c)); -} + prod.lo = pa.lo * pb.lo; // exact + prod.hi = pa.hi * pb.hi; // exact + NumberPair lo_hi = split(pa.lo * pb.hi); // exact + NumberPair hi_lo = split(pa.hi * pb.lo); // exact -// An array of two elements serving as an accumulator during multiword -// computations. -template struct Accumulator final : cpp::array { - using UP = cpp::array; - LIBC_INLINE constexpr Accumulator() : UP({0, 0}) {} - LIBC_INLINE constexpr T advance(T carry_in) { - auto result = UP::front(); - UP::front() = UP::back(); - UP::back() = carry_in; - return result; - } - LIBC_INLINE constexpr T sum() const { return UP::front(); } - LIBC_INLINE constexpr T carry() const { return UP::back(); } -}; + constexpr size_t HALF_BIT_WIDTH = sizeof(T) * CHAR_BIT / 2; -// In-place multiplication by a single word. Returns carry. -template -LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array &dst, - word x) { - Accumulator acc; - for (auto &val : dst) { - const word carry = mul_add_with_carry(acc, val, x); - val = acc.advance(carry); - } - return acc.carry(); -} + auto r1 = add_with_carry(prod.lo, lo_hi.lo << HALF_BIT_WIDTH, T(0)); + prod.lo = r1.sum; + prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum; -// Multiplication of 'lhs' by 'rhs' into 'dst'. Returns carry. -// This function is safe to use for signed numbers. -// https://stackoverflow.com/a/20793834 -// https://pages.cs.wisc.edu/%7Emarkhill/cs354/Fall2008/beyond354/int.mult.html -template -LIBC_INLINE constexpr word multiply_with_carry(cpp::array &dst, - const cpp::array &lhs, - const cpp::array &rhs) { - static_assert(O >= M + N); - Accumulator acc; - for (size_t i = 0; i < O; ++i) { - const size_t lower_idx = i < N ? 0 : i - N + 1; - const size_t upper_idx = i < M ? i : M - 1; - word carry = 0; - for (size_t j = lower_idx; j <= upper_idx; ++j) - carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); - dst[i] = acc.advance(carry); - } - return acc.carry(); -} + auto r2 = add_with_carry(prod.lo, hi_lo.lo << HALF_BIT_WIDTH, T(0)); + prod.lo = r2.sum; + prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum; -template -LIBC_INLINE constexpr void quick_mul_hi(cpp::array &dst, - const cpp::array &lhs, - const cpp::array &rhs) { - Accumulator acc; - word carry = 0; - // First round of accumulation for those at N - 1 in the full product. - for (size_t i = 0; i < N; ++i) - carry += mul_add_with_carry(acc, lhs[i], rhs[N - 1 - i]); - for (size_t i = N; i < 2 * N - 1; ++i) { - acc.advance(carry); - carry = 0; - for (size_t j = i - N + 1; j < N; ++j) - carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); - dst[i - N] = acc.sum(); - } - dst.back() = acc.carry(); + return prod; } -template -LIBC_INLINE constexpr bool is_negative(cpp::array &array) { - using signed_word = cpp::make_signed_t; - return cpp::bit_cast(array.back()) < 0; +template <> +LIBC_INLINE constexpr NumberPair full_mul(uint32_t a, + uint32_t b) { + uint64_t prod = uint64_t(a) * uint64_t(b); + NumberPair result; + result.lo = uint32_t(prod); + result.hi = uint32_t(prod >> 32); + return result; } -// An enum for the shift function below. -enum Direction { LEFT, RIGHT }; - -// A bitwise shift on an array of elements. -// TODO: Make the result UB when 'offset' is greater or equal to the number of -// bits in 'array'. This will allow for better code performance. -template -LIBC_INLINE constexpr cpp::array shift(cpp::array array, - size_t offset) { - static_assert(direction == LEFT || direction == RIGHT); - constexpr size_t WORD_BITS = cpp::numeric_limits::digits; - constexpr size_t TOTAL_BITS = N * WORD_BITS; - if (LIBC_UNLIKELY(offset == 0)) - return array; - if (LIBC_UNLIKELY(offset >= TOTAL_BITS)) - return {}; #ifdef LIBC_TYPES_HAS_INT128 - if constexpr (TOTAL_BITS == 128) { - using type = cpp::conditional_t; - auto tmp = cpp::bit_cast(array); - if constexpr (direction == LEFT) - tmp <<= offset; - else - tmp >>= offset; - return cpp::bit_cast>(tmp); - } -#endif - const bool is_neg = is_signed && is_negative(array); - constexpr auto at = [](size_t index) -> int { - // reverse iteration when direction == LEFT. - if constexpr (direction == LEFT) - return int(N) - int(index) - 1; - return int(index); - }; - const auto safe_get_at = [&](size_t index) -> word { - // return appropriate value when accessing out of bound elements. - const int i = at(index); - if (i < 0) - return 0; - if (i >= int(N)) - return is_neg ? -1 : 0; - return array[i]; - }; - const size_t index_offset = offset / WORD_BITS; - const size_t bit_offset = offset % WORD_BITS; -#ifdef LIBC_COMPILER_IS_CLANG - __builtin_assume(index_offset < N); -#endif - cpp::array out = {}; - for (size_t index = 0; index < N; ++index) { - const word part1 = safe_get_at(index + index_offset); - const word part2 = safe_get_at(index + index_offset + 1); - word &dst = out[at(index)]; - if (bit_offset == 0) - dst = part1; // no crosstalk between parts. - else if constexpr (direction == LEFT) - dst = (part1 << bit_offset) | (part2 >> (WORD_BITS - bit_offset)); - else - dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset)); - } - return out; +template <> +LIBC_INLINE constexpr NumberPair full_mul(uint64_t a, + uint64_t b) { + __uint128_t prod = __uint128_t(a) * __uint128_t(b); + NumberPair result; + result.lo = uint64_t(prod); + result.hi = uint64_t(prod >> 64); + return result; } +#endif // LIBC_TYPES_HAS_INT128 -#define DECLARE_COUNTBIT(NAME, INDEX_EXPR) \ - template \ - LIBC_INLINE constexpr int NAME(const cpp::array &val) { \ - int bit_count = 0; \ - for (size_t i = 0; i < N; ++i) { \ - const int word_count = cpp::NAME(val[INDEX_EXPR]); \ - bit_count += word_count; \ - if (word_count != cpp::numeric_limits::digits) \ - break; \ - } \ - return bit_count; \ - } - -DECLARE_COUNTBIT(countr_zero, i) // iterating forward -DECLARE_COUNTBIT(countr_one, i) // iterating forward -DECLARE_COUNTBIT(countl_zero, N - i - 1) // iterating backward -DECLARE_COUNTBIT(countl_one, N - i - 1) // iterating backward - -} // namespace multiword +} // namespace internal template struct BigInt { -private: static_assert(cpp::is_integral_v && cpp::is_unsigned_v, "WordType must be unsigned integer."); - struct Division { - BigInt quotient; - BigInt remainder; - }; - -public: using word_type = WordType; - using unsigned_type = BigInt; - using signed_type = BigInt; - LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR @@ -351,7 +100,10 @@ public: LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; - cpp::array val{}; // zero initialized. + using unsigned_type = BigInt; + using signed_type = BigInt; + + cpp::array val{}; LIBC_INLINE constexpr BigInt() = default; @@ -360,67 +112,76 @@ public: template LIBC_INLINE constexpr BigInt( const BigInt &other) { - if (OtherBits >= Bits) { // truncate + if (OtherBits >= Bits) { for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = other[i]; - } else { // zero or sign extend + } else { size_t i = 0; for (; i < OtherBits / WORD_SIZE; ++i) val[i] = other[i]; - extend(i, Signed && other.is_neg()); + WordType sign = 0; + if constexpr (Signed && OtherSigned) { + sign = static_cast( + -static_cast>(other.is_neg())); + } + for (; i < WORD_COUNT; ++i) + val[i] = sign; } } // Construct a BigInt from a C array. - template LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { - static_assert(N == WORD_COUNT); - for (size_t i = 0; i < WORD_COUNT; ++i) + template = 0> + LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { + size_t min_wordcount = N < WORD_COUNT ? N : WORD_COUNT; + size_t i = 0; + for (; i < min_wordcount; ++i) val[i] = nums[i]; - } - LIBC_INLINE constexpr explicit BigInt( - const cpp::array &words) { - val = words; + // If nums doesn't completely fill val, then fill the rest with zeroes. + for (; i < WORD_COUNT; ++i) + val[i] = 0; } // Initialize the first word to |v| and the rest to 0. template >> LIBC_INLINE constexpr BigInt(T v) { - constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; - const bool is_neg = Signed && (v < 0); - for (size_t i = 0; i < WORD_COUNT; ++i) { - if (v == 0) { - extend(i, is_neg); - return; + val[0] = static_cast(v); + + if constexpr (WORD_COUNT == 1) + return; + + if constexpr (Bits < sizeof(T) * CHAR_BIT) { + for (int i = 1; i < WORD_COUNT; ++i) { + v >>= WORD_SIZE; + val[i] = static_cast(v); } - val[i] = static_cast(v); - if constexpr (T_SIZE > WORD_SIZE) + return; + } + + size_t i = 1; + + if constexpr (WORD_SIZE < sizeof(T) * CHAR_BIT) + for (; i < sizeof(T) * CHAR_BIT / WORD_SIZE; ++i) { v >>= WORD_SIZE; - else - v = 0; + val[i] = static_cast(v); + } + + WordType sign = (Signed && (v < 0)) ? ~WordType(0) : WordType(0); + for (; i < WORD_COUNT; ++i) { + val[i] = sign; } } - LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; - // constants - LIBC_INLINE static constexpr BigInt zero() { return BigInt(); } - LIBC_INLINE static constexpr BigInt one() { return BigInt(1); } - LIBC_INLINE static constexpr BigInt all_ones() { return ~zero(); } - LIBC_INLINE static constexpr BigInt min() { - BigInt out; - if constexpr (SIGNED) - out.set_msb(); - return out; - } - LIBC_INLINE static constexpr BigInt max() { - BigInt out = all_ones(); - if constexpr (SIGNED) - out.clear_msb(); - return out; + LIBC_INLINE constexpr explicit BigInt( + const cpp::array &words) { + for (size_t i = 0; i < WORD_COUNT; ++i) + val[i] = words[i]; } // TODO: Reuse the Sign type. - LIBC_INLINE constexpr bool is_neg() const { return SIGNED && get_msb(); } + LIBC_INLINE constexpr bool is_neg() const { + return val.back() >> (WORD_SIZE - 1); + } template LIBC_INLINE constexpr explicit operator T() const { return to(); @@ -430,100 +191,200 @@ public: LIBC_INLINE constexpr cpp::enable_if_t< cpp::is_integral_v && !cpp::is_same_v, T> to() const { - constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; T lo = static_cast(val[0]); - if constexpr (T_SIZE <= WORD_SIZE) + + constexpr size_t T_BITS = sizeof(T) * CHAR_BIT; + + if constexpr (T_BITS <= WORD_SIZE) return lo; + constexpr size_t MAX_COUNT = - T_SIZE > Bits ? WORD_COUNT : T_SIZE / WORD_SIZE; + T_BITS > Bits ? WORD_COUNT : T_BITS / WORD_SIZE; for (size_t i = 1; i < MAX_COUNT; ++i) lo += static_cast(val[i]) << (WORD_SIZE * i); - if constexpr (Signed && (T_SIZE > Bits)) { + + if constexpr (Signed && (T_BITS > Bits)) { // Extend sign for negative numbers. constexpr T MASK = (~T(0) << Bits); if (is_neg()) lo |= MASK; } + return lo; } LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); } + LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; + LIBC_INLINE constexpr bool is_zero() const { - for (auto part : val) - if (part != 0) + for (size_t i = 0; i < WORD_COUNT; ++i) { + if (val[i] != 0) return false; + } return true; } - // Add 'rhs' to this number and store the result in this number. + // Add x to this number and store the result in this number. // Returns the carry value produced by the addition operation. - LIBC_INLINE constexpr WordType add_overflow(const BigInt &rhs) { - return multiword::add_with_carry(val, rhs.val); + LIBC_INLINE constexpr WordType add(const BigInt &x) { + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + s = add_with_carry(val[i], x.val[i], s.carry); + val[i] = s.sum; + } + return s.carry; } LIBC_INLINE constexpr BigInt operator+(const BigInt &other) const { - BigInt result = *this; - result.add_overflow(other); + BigInt result; + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + s = add_with_carry(val[i], other.val[i], s.carry); + result.val[i] = s.sum; + } return result; } // This will only apply when initializing a variable from constant values, so // it will always use the constexpr version of add_with_carry. LIBC_INLINE constexpr BigInt operator+(BigInt &&other) const { - // We use addition commutativity to reuse 'other' and prevent allocation. - other.add_overflow(*this); // Returned carry value is ignored. - return other; + BigInt result; + SumCarry s{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + s = add_with_carry(val[i], other.val[i], s.carry); + result.val[i] = s.sum; + } + return result; } LIBC_INLINE constexpr BigInt &operator+=(const BigInt &other) { - add_overflow(other); // Returned carry value is ignored. + add(other); // Returned carry value is ignored. return *this; } - // Subtract 'rhs' to this number and store the result in this number. + // Subtract x to this number and store the result in this number. // Returns the carry value produced by the subtraction operation. - LIBC_INLINE constexpr WordType sub_overflow(const BigInt &rhs) { - return multiword::sub_with_borrow(val, rhs.val); + LIBC_INLINE constexpr WordType sub(const BigInt &x) { + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + d = sub_with_borrow(val[i], x.val[i], d.borrow); + val[i] = d.diff; + } + return d.borrow; } LIBC_INLINE constexpr BigInt operator-(const BigInt &other) const { - BigInt result = *this; - result.sub_overflow(other); // Returned carry value is ignored. + BigInt result; + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + d = sub_with_borrow(val[i], other.val[i], d.borrow); + result.val[i] = d.diff; + } return result; } LIBC_INLINE constexpr BigInt operator-(BigInt &&other) const { - BigInt result = *this; - result.sub_overflow(other); // Returned carry value is ignored. + BigInt result; + DiffBorrow d{0, 0}; + for (size_t i = 0; i < WORD_COUNT; ++i) { + d = sub_with_borrow(val[i], other.val[i], d.borrow); + result.val[i] = d.diff; + } return result; } LIBC_INLINE constexpr BigInt &operator-=(const BigInt &other) { // TODO(lntue): Set overflow flag / errno when carry is true. - sub_overflow(other); // Returned carry value is ignored. + sub(other); return *this; } - // Multiply this number with x and store the result in this number. + // Multiply this number with x and store the result in this number. It is + // implemented using the long multiplication algorithm by splitting the + // 64-bit words of this number and |x| in to 32-bit halves but peforming + // the operations using 64-bit numbers. This ensures that we don't lose the + // carry bits. + // Returns the carry value produced by the multiplication operation. LIBC_INLINE constexpr WordType mul(WordType x) { - return multiword::scalar_multiply_with_carry(val, x); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + for (size_t i = 0; i < WORD_COUNT; ++i) { + NumberPair prod = internal::full_mul(val[i], x); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); + const WordType carry = partial_sum.add(tmp); + val[i] = partial_sum.val[0]; + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + } + return partial_sum.val[1]; + } + + LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { + if constexpr (Signed) { + BigInt a(*this); + BigInt b(other); + const bool a_neg = a.is_neg(); + const bool b_neg = b.is_neg(); + if (a_neg) + a = -a; + if (b_neg) + b = -b; + BigInt prod = a * b; + if (a_neg != b_neg) + prod = -prod; + return static_cast>(prod); + } else { + if constexpr (WORD_COUNT == 1) { + return {val[0] * other.val[0]}; + } else { + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + for (size_t i = 0; i < WORD_COUNT; ++i) { + for (size_t j = 0; j <= i; j++) { + NumberPair prod = + internal::full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + result.val[i] = partial_sum.val[0]; + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + carry = 0; + } + return result; + } + } } - // Return the full product. + // Return the full product, only unsigned for now. template - LIBC_INLINE constexpr auto + LIBC_INLINE constexpr BigInt ful_mul(const BigInt &other) const { - BigInt result; - multiword::multiply_with_carry(result.val, val, other.val); + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + constexpr size_t OTHER_WORDCOUNT = + BigInt::WORD_COUNT; + for (size_t i = 0; i <= WORD_COUNT + OTHER_WORDCOUNT - 2; ++i) { + const size_t lower_idx = + i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1; + const size_t upper_idx = i < WORD_COUNT ? i : WORD_COUNT - 1; + for (size_t j = lower_idx; j <= upper_idx; ++j) { + NumberPair prod = + internal::full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + result.val[i] = partial_sum.val[0]; + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + carry = 0; + } + result.val[WORD_COUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0]; return result; } - LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { - // Perform full mul and truncate. - return BigInt(ful_mul(other)); - } - // Fast hi part of the full product. The normal product `operator*` returns // `Bits` least significant bits of the full product, while this function will // approximate `Bits` most significant bits of the full product with errors @@ -546,17 +407,39 @@ public: // 256 4 16 10 3 // 512 8 64 36 7 LIBC_INLINE constexpr BigInt quick_mul_hi(const BigInt &other) const { - BigInt result; - multiword::quick_mul_hi(result.val, val, other.val); + BigInt result(0); + BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); + WordType carry = 0; + // First round of accumulation for those at WORD_COUNT - 1 in the full + // product. + for (size_t i = 0; i < WORD_COUNT; ++i) { + NumberPair prod = + internal::full_mul(val[i], other.val[WORD_COUNT - 1 - i]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + for (size_t i = WORD_COUNT; i < 2 * WORD_COUNT - 1; ++i) { + partial_sum.val[0] = partial_sum.val[1]; + partial_sum.val[1] = carry; + carry = 0; + for (size_t j = i - WORD_COUNT + 1; j < WORD_COUNT; ++j) { + NumberPair prod = + internal::full_mul(val[j], other.val[i - j]); + BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); + carry += partial_sum.add(tmp); + } + result.val[i - WORD_COUNT] = partial_sum.val[0]; + } + result.val[WORD_COUNT - 1] = partial_sum.val[1]; return result; } - // BigInt(x).pow_n(n) computes x ^ n. - // Note 0 ^ 0 == 1. + // pow takes a power and sets this to its starting value to that power. Zero + // to the zeroth power returns 1. LIBC_INLINE constexpr void pow_n(uint64_t power) { - static_assert(!Signed); - BigInt result = one(); + BigInt result = 1; BigInt cur_power = *this; + while (power > 0) { if ((power % 2) > 0) result *= cur_power; @@ -566,23 +449,38 @@ public: *this = result; } - // Performs inplace signed / unsigned division. Returns remainder if not - // dividing by zero. - // For signed numbers it behaves like C++ signed integer division. - // That is by truncating the fractionnal part - // https://stackoverflow.com/a/3602857 - LIBC_INLINE constexpr cpp::optional div(const BigInt ÷r) { - if (LIBC_UNLIKELY(divider.is_zero())) + // TODO: Make division work correctly for signed integers. + + // div takes another BigInt of the same size and divides this by it. The value + // of this will be set to the quotient, and the return value is the remainder. + LIBC_INLINE constexpr cpp::optional div(const BigInt &other) { + BigInt remainder(0); + if (*this < other) { + remainder = *this; + *this = BigInt(0); + return remainder; + } + if (other == 1) { + return remainder; + } + if (other == 0) { return cpp::nullopt; - if (LIBC_UNLIKELY(divider == BigInt::one())) - return BigInt::zero(); - Division result; - if constexpr (SIGNED) - result = divide_signed(*this, divider); - else - result = divide_unsigned(*this, divider); - *this = result.quotient; - return result.remainder; + } + + BigInt quotient(0); + BigInt subtractor = other; + int cur_bit = static_cast(subtractor.clz() - this->clz()); + subtractor.shift_left(cur_bit); + + for (; cur_bit >= 0 && *this > 0; --cur_bit, subtractor.shift_right(1)) { + if (*this >= subtractor) { + this->sub(subtractor); + quotient = quotient | (BigInt(1) << cur_bit); + } + } + remainder = *this; + *this = quotient; + return remainder; } // Efficiently perform BigInt / (x * 2^e), where x is a half-word-size @@ -598,16 +496,19 @@ public: // computation of each step is now properly contained within WordType. // And finally we perform some extra alignment steps for the remaining bits. LIBC_INLINE constexpr cpp::optional - div_uint_half_times_pow_2(multiword::half_width_t x, size_t e) { - BigInt remainder; - if (x == 0) + div_uint_half_times_pow_2(internal::half_width_t x, size_t e) { + BigInt remainder(0); + + if (x == 0) { return cpp::nullopt; + } if (e >= Bits) { remainder = *this; - *this = BigInt(); + *this = BigInt(0); return remainder; } - BigInt quotient; + + BigInt quotient(0); WordType x_word = static_cast(x); constexpr size_t LOG2_WORD_SIZE = cpp::bit_width(WORD_SIZE) - 1; constexpr size_t HALF_WORD_SIZE = WORD_SIZE >> 1; @@ -732,22 +633,189 @@ public: return *this; } - LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { - val = multiword::shift(val, s); - return *this; + // TODO: remove and use cpp::countl_zero below. + [[nodiscard]] LIBC_INLINE constexpr int clz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int leading_zeroes = 0; + for (auto i = val.size(); i > 0;) { + --i; + const int zeroes = cpp::countl_zero(val[i]); + leading_zeroes += zeroes; + if (zeroes != word_digits) + break; + } + return leading_zeroes; + } + + // TODO: remove and use cpp::countr_zero below. + [[nodiscard]] LIBC_INLINE constexpr int ctz() const { + constexpr int word_digits = cpp::numeric_limits::digits; + int trailing_zeroes = 0; + for (auto word : val) { + const int zeroes = cpp::countr_zero(word); + trailing_zeroes += zeroes; + if (zeroes != word_digits) + break; + } + return trailing_zeroes; + } + + LIBC_INLINE constexpr void shift_left(size_t s) { + if constexpr (Bits == WORD_SIZE) { + // Use native types if possible. + if (s >= WORD_SIZE) { + val[0] = 0; + return; + } + val[0] <<= s; + return; + } + if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { + // Use builtin 64 bits for 32-bit base type if available; + if (s >= 64) { + val[0] = 0; + val[1] = 0; + return; + } + uint64_t tmp = uint64__t(val[0]) + (uint64_t(val[1]) << 62); + tmp <<= s; + val[0] = uint32_t(tmp); + val[1] = uint32_t(tmp >> 32); + return; + } +#ifdef LIBC_TYPES_HAS_INT128 + if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { + // Use builtin 128 bits if available; + if (s >= 128) { + val[0] = 0; + val[1] = 0; + return; + } + __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); + tmp <<= s; + val[0] = uint64_t(tmp); + val[1] = uint64_t(tmp >> 64); + return; + } +#endif // LIBC_TYPES_HAS_INT128 + if (LIBC_UNLIKELY(s == 0)) + return; + + const size_t drop = s / WORD_SIZE; // Number of words to drop + const size_t shift = s % WORD_SIZE; // Bits to shift in the remaining words. + size_t i = WORD_COUNT; + + if (drop < WORD_COUNT) { + i = WORD_COUNT - 1; + if (shift > 0) { + for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { + val[i] = (val[j] << shift) | (val[j - 1] >> (WORD_SIZE - shift)); + } + val[i] = val[0] << shift; + } else { + for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { + val[i] = val[j]; + } + val[i] = val[0]; + } + } + + for (size_t j = 0; j < i; ++j) { + val[j] = 0; + } } LIBC_INLINE constexpr BigInt operator<<(size_t s) const { - return BigInt(multiword::shift(val, s)); + BigInt result(*this); + result.shift_left(s); + return result; } - LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { - val = multiword::shift(val, s); + LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { + shift_left(s); return *this; } + LIBC_INLINE constexpr void shift_right(size_t s) { + if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { + // Use builtin 64 bits if available; + if (s >= 64) { + val[0] = 0; + val[1] = 0; + return; + } + uint64_t tmp = uint64_t(val[0]) + (uint64_t(val[1]) << 32); + if constexpr (Signed) { + tmp = static_cast(static_cast(tmp) >> s); + } else { + tmp >>= s; + } + val[0] = uint32_t(tmp); + val[1] = uint32_t(tmp >> 32); + return; + } +#ifdef LIBC_TYPES_HAS_INT128 + if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { + // Use builtin 128 bits if available; + if (s >= 128) { + val[0] = 0; + val[1] = 0; + return; + } + __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); + if constexpr (Signed) { + tmp = static_cast<__uint128_t>(static_cast<__int128_t>(tmp) >> s); + } else { + tmp >>= s; + } + val[0] = uint64_t(tmp); + val[1] = uint64_t(tmp >> 64); + return; + } +#endif // LIBC_TYPES_HAS_INT128 + + if (LIBC_UNLIKELY(s == 0)) + return; + const size_t drop = s / WORD_SIZE; // Number of words to drop + const size_t shift = s % WORD_SIZE; // Bit shift in the remaining words. + + size_t i = 0; + WordType sign = Signed ? is_neg() : 0; + + if (drop < WORD_COUNT) { + if (shift > 0) { + for (size_t j = drop; j < WORD_COUNT - 1; ++i, ++j) { + val[i] = (val[j] >> shift) | (val[j + 1] << (WORD_SIZE - shift)); + } + if constexpr (Signed) { + val[i] = static_cast( + static_cast>(val[WORD_COUNT - 1]) >> + shift); + } else { + val[i] = val[WORD_COUNT - 1] >> shift; + } + ++i; + } else { + for (size_t j = drop; j < WORD_COUNT; ++i, ++j) { + val[i] = val[j]; + } + } + } + + for (; i < WORD_COUNT; ++i) { + val[i] = sign; + } + } + LIBC_INLINE constexpr BigInt operator>>(size_t s) const { - return BigInt(multiword::shift(val, s)); + BigInt result(*this); + result.shift_right(s); + return result; + } + + LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { + shift_right(s); + return *this; } #define DEFINE_BINOP(OP) \ @@ -765,9 +833,10 @@ public: return lhs; \ } - DEFINE_BINOP(&) // & and &= - DEFINE_BINOP(|) // | and |= - DEFINE_BINOP(^) // ^ and ^= + DEFINE_BINOP(&) + DEFINE_BINOP(|) + DEFINE_BINOP(^) + #undef DEFINE_BINOP LIBC_INLINE constexpr BigInt operator~() const { @@ -778,8 +847,8 @@ public: } LIBC_INLINE constexpr BigInt operator-() const { - BigInt result(*this); - result.negate(); + BigInt result = ~(*this); + result.add(BigInt(1)); return result; } @@ -796,6 +865,24 @@ public: return !(lhs == rhs); } +private: + LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { + const auto compare = [](WordType a, WordType b) { + return a == b ? 0 : a > b ? 1 : -1; + }; + if constexpr (Signed) { + const bool lhs_is_neg = lhs.is_neg(); + const bool rhs_is_neg = rhs.is_neg(); + if (lhs_is_neg != rhs_is_neg) + return rhs_is_neg ? 1 : -1; + } + for (size_t i = WORD_COUNT; i-- > 0;) + if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) + return cmp; + return 0; + } + +public: LIBC_INLINE friend constexpr bool operator>(const BigInt &lhs, const BigInt &rhs) { return cmp(lhs, rhs) > 0; @@ -814,24 +901,24 @@ public: } LIBC_INLINE constexpr BigInt &operator++() { - increment(); + add(BigInt(1)); return *this; } LIBC_INLINE constexpr BigInt operator++(int) { BigInt oldval(*this); - increment(); + add(BigInt(1)); return oldval; } LIBC_INLINE constexpr BigInt &operator--() { - decrement(); + sub(BigInt(1)); return *this; } LIBC_INLINE constexpr BigInt operator--(int) { BigInt oldval(*this); - decrement(); + sub(BigInt(1)); return oldval; } @@ -843,117 +930,9 @@ public: // Return the i-th word of the number. LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; } -private: - LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { - constexpr auto compare = [](WordType a, WordType b) { - return a == b ? 0 : a > b ? 1 : -1; - }; - if constexpr (Signed) { - const bool lhs_is_neg = lhs.is_neg(); - const bool rhs_is_neg = rhs.is_neg(); - if (lhs_is_neg != rhs_is_neg) - return rhs_is_neg ? 1 : -1; - } - for (size_t i = WORD_COUNT; i-- > 0;) - if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) - return cmp; - return 0; - } - - LIBC_INLINE constexpr void bitwise_not() { - for (auto &part : val) - part = ~part; - } - - LIBC_INLINE constexpr void negate() { - bitwise_not(); - increment(); - } + LIBC_INLINE WordType *data() { return val; } - LIBC_INLINE constexpr void increment() { - multiword::add_with_carry(val, cpp::array{1}); - } - - LIBC_INLINE constexpr void decrement() { - multiword::add_with_carry(val, cpp::array{1}); - } - - LIBC_INLINE constexpr void extend(size_t index, bool is_neg) { - const WordType value = is_neg ? cpp::numeric_limits::max() - : cpp::numeric_limits::min(); - for (size_t i = index; i < WORD_COUNT; ++i) - val[i] = value; - } - - LIBC_INLINE constexpr bool get_msb() const { - return val.back() >> (WORD_SIZE - 1); - } - - LIBC_INLINE constexpr void set_msb() { - val.back() |= mask_leading_ones(); - } - - LIBC_INLINE constexpr void clear_msb() { - val.back() &= mask_trailing_ones(); - } - - LIBC_INLINE constexpr void set_bit(size_t i) { - const size_t word_index = i / WORD_SIZE; - val[word_index] |= WordType(1) << (i % WORD_SIZE); - } - - LIBC_INLINE constexpr static Division divide_unsigned(const BigInt ÷nd, - const BigInt ÷r) { - BigInt remainder = dividend; - BigInt quotient; - if (remainder >= divider) { - BigInt subtractor = divider; - int cur_bit = multiword::countl_zero(subtractor.val) - - multiword::countl_zero(remainder.val); - subtractor <<= cur_bit; - for (; cur_bit >= 0 && remainder > 0; --cur_bit, subtractor >>= 1) { - if (remainder < subtractor) - continue; - remainder -= subtractor; - quotient.set_bit(cur_bit); - } - } - return Division{quotient, remainder}; - } - - LIBC_INLINE constexpr static Division divide_signed(const BigInt ÷nd, - const BigInt ÷r) { - // Special case because it is not possible to negate the min value of a - // signed integer. - if (dividend == min() && divider == min()) - return Division{one(), zero()}; - // 1. Convert the dividend and divisor to unsigned representation. - unsigned_type udividend(dividend); - unsigned_type udivider(divider); - // 2. Negate the dividend if it's negative, and similarly for the divisor. - const bool dividend_is_neg = dividend.is_neg(); - const bool divider_is_neg = divider.is_neg(); - if (dividend_is_neg) - udividend.negate(); - if (divider_is_neg) - udivider.negate(); - // 3. Use unsigned multiword division algorithm. - const auto unsigned_result = divide_unsigned(udividend, udivider); - // 4. Convert the quotient and remainder to signed representation. - Division result; - result.quotient = signed_type(unsigned_result.quotient); - result.remainder = signed_type(unsigned_result.remainder); - // 5. Negate the quotient if the dividend and divisor had opposite signs. - if (dividend_is_neg != divider_is_neg) - result.quotient.negate(); - // 6. Negate the remainder if the dividend was negative. - if (dividend_is_neg) - result.remainder.negate(); - return result; - } - - friend signed_type; - friend unsigned_type; + LIBC_INLINE const WordType *data() const { return val; } }; namespace internal { @@ -983,8 +962,10 @@ using Int = BigInt>; // Provides limits of U/Int<128>. template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); } - LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); } + LIBC_INLINE static constexpr UInt<128> max() { + return UInt<128>({0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}); + } + LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>(0); } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -992,8 +973,12 @@ public: template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); } - LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); } + LIBC_INLINE static constexpr Int<128> max() { + return Int<128>({0xffff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}); + } + LIBC_INLINE static constexpr Int<128> min() { + return Int<128>({0, 0x8000'0000'0000'0000}); + } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -1127,28 +1112,30 @@ has_single_bit(T value) { template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_zero(const T &value) { - return multiword::countr_zero(value.val); + return value.ctz(); } // Specialization of cpp::countl_zero ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_zero(const T &value) { - return multiword::countl_zero(value.val); + return value.clz(); } // Specialization of cpp::countl_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_one(T value) { - return multiword::countl_one(value.val); + // TODO : Implement a faster version not involving operator~. + return cpp::countl_zero(~value); } // Specialization of cpp::countr_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_one(T value) { - return multiword::countr_one(value.val); + // TODO : Implement a faster version not involving operator~. + return cpp::countr_zero(~value); } // Specialization of cpp::bit_width ('bit.h') for BigInt. @@ -1195,59 +1182,65 @@ rotr(T value, int rotate) { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_trailing_ones() { - static_assert(!T::SIGNED && count <= T::BITS); - if (count == T::BITS) - return T::all_ones(); - constexpr size_t QUOTIENT = count / T::WORD_SIZE; - constexpr size_t REMAINDER = count % T::WORD_SIZE; - T out; // zero initialized - for (size_t i = 0; i <= QUOTIENT; ++i) - out[i] = i < QUOTIENT - ? -1 - : mask_trailing_ones(); + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast(count / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else + word = mask_trailing_ones(); + ++index; + } return out; } // Specialization of mask_leading_ones ('math_extras.h') for BigInt. template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - static_assert(!T::SIGNED && count <= T::BITS); - if (count == T::BITS) - return T::all_ones(); - constexpr size_t QUOTIENT = (T::BITS - count - 1U) / T::WORD_SIZE; - constexpr size_t REMAINDER = count % T::WORD_SIZE; - T out; // zero initialized - for (size_t i = QUOTIENT; i < T::WORD_COUNT; ++i) - out[i] = i > QUOTIENT - ? -1 - : mask_leading_ones(); + static_assert(!T::SIGNED); + if (count == 0) + return T(); + constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); + static_assert(count <= T_BITS && "Invalid bit index"); + using word_type = typename T::word_type; + T out; + constexpr int CHUNK_INDEX_CONTAINING_BIT = + static_cast((T::BITS - count - 1ULL) / T::WORD_SIZE); + int index = 0; + for (auto &word : out.val) { + if (index < CHUNK_INDEX_CONTAINING_BIT) + word = 0; + else if (index > CHUNK_INDEX_CONTAINING_BIT) + word = -1; + else + word = mask_leading_ones(); + ++index; + } return out; } -// Specialization of mask_trailing_zeros ('math_extras.h') for BigInt. -template -LIBC_INLINE constexpr cpp::enable_if_t, T> -mask_trailing_zeros() { - return mask_leading_ones(); -} - -// Specialization of mask_leading_zeros ('math_extras.h') for BigInt. -template -LIBC_INLINE constexpr cpp::enable_if_t, T> -mask_leading_zeros() { - return mask_trailing_ones(); -} - // Specialization of count_zeros ('math_extras.h') for BigInt. template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t, int> count_zeros(T value) { return cpp::popcount(~value); } // Specialization of first_leading_zero ('math_extras.h') for BigInt. template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countl_one(value) + 1; @@ -1255,14 +1248,16 @@ first_leading_zero(T value) { // Specialization of first_leading_one ('math_extras.h') for BigInt. template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_one(T value) { return first_leading_zero(~value); } // Specialization of first_trailing_zero ('math_extras.h') for BigInt. template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(~value) + 1; @@ -1270,7 +1265,8 @@ first_trailing_zero(T value) { // Specialization of first_trailing_one ('math_extras.h') for BigInt. template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] +LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(value) + 1; diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 4c59cfd..1287c3e 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -689,7 +689,7 @@ template <> class FloatToString { wide_int float_as_int = mantissa; - float_as_int <<= exponent; + float_as_int.shift_left(exponent); int_block_index = 0; while (float_as_int > 0) { @@ -708,11 +708,10 @@ template <> class FloatToString { const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent; static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8); - float_as_fixed <<= SHIFT_AMOUNT; + float_as_fixed.shift_left(SHIFT_AMOUNT); // If there are still digits above the decimal point, handle those. - if (cpp::countl_zero(float_as_fixed) < - static_cast(EXTRA_INT_WIDTH)) { + if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index e99799c..de1f88f 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -151,15 +151,12 @@ template struct Parser> { template LIBC_INLINE constexpr T parse_with_prefix(const char *ptr) { using P = Parser; - if (ptr == nullptr) - return T(); - if (ptr[0] == '0') { - if (ptr[1] == 'b') - return P::template parse<2>(ptr + 2); - if (ptr[1] == 'x') - return P::template parse<16>(ptr + 2); - } - return P::template parse<10>(ptr); + if (ptr[0] == '0' && ptr[1] == 'x') + return P::template parse<16>(ptr + 2); + else if (ptr[0] == '0' && ptr[1] == 'b') + return P::template parse<2>(ptr + 2); + else + return P::template parse<10>(ptr); } } // namespace internal @@ -172,16 +169,6 @@ LIBC_INLINE constexpr auto operator""_u256(const char *x) { return internal::parse_with_prefix>(x); } -template LIBC_INLINE constexpr T parse_bigint(const char *ptr) { - if (ptr == nullptr) - return T(); - if (ptr[0] == '-' || ptr[0] == '+') { - auto positive = internal::parse_with_prefix(ptr + 1); - return ptr[0] == '-' ? -positive : positive; - } - return internal::parse_with_prefix(ptr); -} - } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index bb6424b..70a8800 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H -#include "src/__support/CPP/bit.h" // countl_one, countr_zero -#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits -#include "src/__support/CPP/type_traits.h" // is_unsigned_v, is_constant_evaluated +#include "src/__support/CPP/bit.h" // countl_one, countr_zero +#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits +#include "src/__support/CPP/type_traits.h" // is_unsigned_v #include "src/__support/macros/attributes.h" // LIBC_INLINE namespace LIBC_NAMESPACE { @@ -32,94 +32,199 @@ mask_trailing_ones() { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - return T(~mask_trailing_ones()); + constexpr T MASK(mask_trailing_ones()); + return T(~MASK); // bitwise NOT performs integer promotion. } -// Create a bitmask with the count right-most bits set to 0, and all other bits -// set to 1. Only unsigned types are allowed. -template -LIBC_INLINE constexpr cpp::enable_if_t, T> -mask_trailing_zeros() { - return mask_leading_ones(); +// Add with carry +template struct SumCarry { + T sum; + T carry; +}; + +// This version is always valid for constexpr. +template +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> +add_with_carry_const(T a, T b, T carry_in) { + T tmp = a + carry_in; + T sum = b + tmp; + T carry_out = (sum < b) + (tmp < a); + return {sum, carry_out}; } -// Create a bitmask with the count left-most bits set to 0, and all other bits -// set to 1. Only unsigned types are allowed. -template -LIBC_INLINE constexpr cpp::enable_if_t, T> -mask_leading_zeros() { - return mask_trailing_ones(); +template +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> +add_with_carry(T a, T b, T carry_in) { + return add_with_carry_const(a, b, carry_in); +} + +#if __has_builtin(__builtin_addc) +// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins + +template <> +LIBC_INLINE constexpr SumCarry +add_with_carry(unsigned char a, unsigned char b, + unsigned char carry_in) { + if (__builtin_is_constant_evaluated()) { + return add_with_carry_const(a, b, carry_in); + } else { + SumCarry result{0, 0}; + result.sum = __builtin_addcb(a, b, carry_in, &result.carry); + return result; + } +} + +template <> +LIBC_INLINE constexpr SumCarry +add_with_carry(unsigned short a, unsigned short b, + unsigned short carry_in) { + if (__builtin_is_constant_evaluated()) { + return add_with_carry_const(a, b, carry_in); + } else { + SumCarry result{0, 0}; + result.sum = __builtin_addcs(a, b, carry_in, &result.carry); + return result; + } +} + +template <> +LIBC_INLINE constexpr SumCarry +add_with_carry(unsigned int a, unsigned int b, + unsigned int carry_in) { + if (__builtin_is_constant_evaluated()) { + return add_with_carry_const(a, b, carry_in); + } else { + SumCarry result{0, 0}; + result.sum = __builtin_addc(a, b, carry_in, &result.carry); + return result; + } +} + +template <> +LIBC_INLINE constexpr SumCarry +add_with_carry(unsigned long a, unsigned long b, + unsigned long carry_in) { + if (__builtin_is_constant_evaluated()) { + return add_with_carry_const(a, b, carry_in); + } else { + SumCarry result{0, 0}; + result.sum = __builtin_addcl(a, b, carry_in, &result.carry); + return result; + } +} + +template <> +LIBC_INLINE constexpr SumCarry +add_with_carry(unsigned long long a, unsigned long long b, + unsigned long long carry_in) { + if (__builtin_is_constant_evaluated()) { + return add_with_carry_const(a, b, carry_in); + } else { + SumCarry result{0, 0}; + result.sum = __builtin_addcll(a, b, carry_in, &result.carry); + return result; + } } -// Returns whether 'a + b' overflows, the result is stored in 'res'. +#endif // __has_builtin(__builtin_addc) + +// Subtract with borrow +template struct DiffBorrow { + T diff; + T borrow; +}; + +// This version is always valid for constexpr. template -[[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T &res) { - return __builtin_add_overflow(a, b, &res); +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> +sub_with_borrow_const(T a, T b, T borrow_in) { + T tmp = a - b; + T diff = tmp - borrow_in; + T borrow_out = (diff > tmp) + (tmp > a); + return {diff, borrow_out}; } -// Returns whether 'a - b' overflows, the result is stored in 'res'. +// This version is not always valid for constepxr because it's overriden below +// if builtins are available. template -[[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T &res) { - return __builtin_sub_overflow(a, b, &res); +LIBC_INLINE constexpr cpp::enable_if_t< + cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> +sub_with_borrow(T a, T b, T borrow_in) { + return sub_with_borrow_const(a, b, borrow_in); } -#define RETURN_IF(TYPE, BUILTIN) \ - if constexpr (cpp::is_same_v) \ - return BUILTIN(a, b, carry_in, carry_out); +#if __has_builtin(__builtin_subc) +// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins -// Returns the result of 'a + b' taking into account 'carry_in'. -// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. -// We keep the pass by pointer interface for consistency with the intrinsic. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -add_with_carry(T a, T b, T carry_in, T &carry_out) { - if constexpr (!cpp::is_constant_evaluated()) { -#if __has_builtin(__builtin_addcb) - RETURN_IF(unsigned char, __builtin_addcb) -#elif __has_builtin(__builtin_addcs) - RETURN_IF(unsigned short, __builtin_addcs) -#elif __has_builtin(__builtin_addc) - RETURN_IF(unsigned int, __builtin_addc) -#elif __has_builtin(__builtin_addcl) - RETURN_IF(unsigned long, __builtin_addcl) -#elif __has_builtin(__builtin_addcll) - RETURN_IF(unsigned long long, __builtin_addcll) -#endif +template <> +LIBC_INLINE constexpr DiffBorrow +sub_with_borrow(unsigned char a, unsigned char b, + unsigned char borrow_in) { + if (__builtin_is_constant_evaluated()) { + return sub_with_borrow_const(a, b, borrow_in); + } else { + DiffBorrow result{0, 0}; + result.diff = __builtin_subcb(a, b, borrow_in, &result.borrow); + return result; } - T sum; - T carry1 = add_overflow(a, b, sum); - T carry2 = add_overflow(sum, carry_in, sum); - carry_out = carry1 | carry2; - return sum; } -// Returns the result of 'a - b' taking into account 'carry_in'. -// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. -// We keep the pass by pointer interface for consistency with the intrinsic. -template -[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> -sub_with_borrow(T a, T b, T carry_in, T &carry_out) { - if constexpr (!cpp::is_constant_evaluated()) { -#if __has_builtin(__builtin_subcb) - RETURN_IF(unsigned char, __builtin_subcb) -#elif __has_builtin(__builtin_subcs) - RETURN_IF(unsigned short, __builtin_subcs) -#elif __has_builtin(__builtin_subc) - RETURN_IF(unsigned int, __builtin_subc) -#elif __has_builtin(__builtin_subcl) - RETURN_IF(unsigned long, __builtin_subcl) -#elif __has_builtin(__builtin_subcll) - RETURN_IF(unsigned long long, __builtin_subcll) -#endif +template <> +LIBC_INLINE constexpr DiffBorrow +sub_with_borrow(unsigned short a, unsigned short b, + unsigned short borrow_in) { + if (__builtin_is_constant_evaluated()) { + return sub_with_borrow_const(a, b, borrow_in); + } else { + DiffBorrow result{0, 0}; + result.diff = __builtin_subcs(a, b, borrow_in, &result.borrow); + return result; + } +} + +template <> +LIBC_INLINE constexpr DiffBorrow +sub_with_borrow(unsigned int a, unsigned int b, + unsigned int borrow_in) { + if (__builtin_is_constant_evaluated()) { + return sub_with_borrow_const(a, b, borrow_in); + } else { + DiffBorrow result{0, 0}; + result.diff = __builtin_subc(a, b, borrow_in, &result.borrow); + return result; + } +} + +template <> +LIBC_INLINE constexpr DiffBorrow +sub_with_borrow(unsigned long a, unsigned long b, + unsigned long borrow_in) { + if (__builtin_is_constant_evaluated()) { + return sub_with_borrow_const(a, b, borrow_in); + } else { + DiffBorrow result{0, 0}; + result.diff = __builtin_subcl(a, b, borrow_in, &result.borrow); + return result; + } +} + +template <> +LIBC_INLINE constexpr DiffBorrow +sub_with_borrow(unsigned long long a, unsigned long long b, + unsigned long long borrow_in) { + if (__builtin_is_constant_evaluated()) { + return sub_with_borrow_const(a, b, borrow_in); + } else { + DiffBorrow result{0, 0}; + result.diff = __builtin_subcll(a, b, borrow_in, &result.borrow); + return result; } - T sub; - T carry1 = sub_overflow(a, b, sub); - T carry2 = sub_overflow(sub, carry_in, sub); - carry_out = carry1 | carry2; - return sub; } -#undef RETURN_IF +#endif // __has_builtin(__builtin_subc) template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h index 2f713fc..ee6667b 100644 --- a/libc/src/__support/number_pair.h +++ b/libc/src/__support/number_pair.h @@ -20,6 +20,17 @@ template struct NumberPair { T hi = T(0); }; +template +cpp::enable_if_t && cpp::is_unsigned_v, + NumberPair> constexpr split(T a) { + constexpr size_t HALF_BIT_WIDTH = sizeof(T) * 4; + constexpr T LOWER_HALF_MASK = (T(1) << HALF_BIT_WIDTH) - T(1); + NumberPair result; + result.lo = a & LOWER_HALF_MASK; + result.hi = a >> HALF_BIT_WIDTH; + return result; +} + } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H diff --git a/libc/test/src/__support/integer_literals_test.cpp b/libc/test/src/__support/integer_literals_test.cpp index cbc906a..5298cf3 100644 --- a/libc/test/src/__support/integer_literals_test.cpp +++ b/libc/test/src/__support/integer_literals_test.cpp @@ -133,24 +133,3 @@ TEST(LlvmLibcIntegerLiteralTest, u256) { U256_MAX, 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u256); } - -TEST(LlvmLibcIntegerLiteralTest, parse_bigint) { - using T = LIBC_NAMESPACE::Int<128>; - struct { - const char *str; - T expected; - } constexpr TEST_CASES[] = { - {"0", 0}, {"-1", -1}, {"+1", 1}, {"-0xFF", -255}, {"-0b11", -3}, - }; - for (auto tc : TEST_CASES) { - T actual = LIBC_NAMESPACE::parse_bigint(tc.str); - EXPECT_EQ(actual, tc.expected); - } -} - -TEST(LlvmLibcIntegerLiteralTest, parse_bigint_invalid) { - using T = LIBC_NAMESPACE::Int<128>; - const T expected; // default construction - EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(nullptr), expected); - EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(""), expected); -} diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index 401e631e..e88b3e1 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -101,61 +101,4 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } -using UnsignedTypes = testing::TypeList< -#if defined(__SIZEOF_INT128__) - __uint128_t, -#endif - unsigned char, unsigned short, unsigned int, unsigned long, - unsigned long long>; - -TYPED_TEST(LlvmLibcBlockMathExtrasTest, add_overflow, UnsignedTypes) { - constexpr T ZERO = cpp::numeric_limits::min(); - constexpr T ONE(1); - constexpr T MAX = cpp::numeric_limits::max(); - constexpr T BEFORE_MAX = MAX - 1; - - const struct { - T lhs; - T rhs; - T sum; - bool carry; - } TESTS[] = { - {ZERO, ONE, ONE, false}, // 0x00 + 0x01 = 0x01 - {BEFORE_MAX, ONE, MAX, false}, // 0xFE + 0x01 = 0xFF - {MAX, ONE, ZERO, true}, // 0xFF + 0x01 = 0x00 (carry) - {MAX, MAX, BEFORE_MAX, true}, // 0xFF + 0xFF = 0xFE (carry) - }; - for (auto tc : TESTS) { - T sum; - bool carry = add_overflow(tc.lhs, tc.rhs, sum); - EXPECT_EQ(sum, tc.sum); - EXPECT_EQ(carry, tc.carry); - } -} - -TYPED_TEST(LlvmLibcBlockMathExtrasTest, sub_overflow, UnsignedTypes) { - constexpr T ZERO = cpp::numeric_limits::min(); - constexpr T ONE(1); - constexpr T MAX = cpp::numeric_limits::max(); - constexpr T BEFORE_MAX = MAX - 1; - - const struct { - T lhs; - T rhs; - T sub; - bool carry; - } TESTS[] = { - {ONE, ZERO, ONE, false}, // 0x01 - 0x00 = 0x01 - {MAX, MAX, ZERO, false}, // 0xFF - 0xFF = 0x00 - {ZERO, ONE, MAX, true}, // 0x00 - 0x01 = 0xFF (carry) - {BEFORE_MAX, MAX, MAX, true}, // 0xFE - 0xFF = 0xFF (carry) - }; - for (auto tc : TESTS) { - T sub; - bool carry = sub_overflow(tc.lhs, tc.rhs, sub); - EXPECT_EQ(sub, tc.sub); - EXPECT_EQ(carry, tc.carry); - } -} - } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/uint_test.cpp index 5696e54..5764324 100644 --- a/libc/test/src/__support/uint_test.cpp +++ b/libc/test/src/__support/uint_test.cpp @@ -8,7 +8,6 @@ #include "src/__support/CPP/optional.h" #include "src/__support/UInt.h" -#include "src/__support/integer_literals.h" // parse_unsigned_bigint #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #include "include/llvm-libc-macros/math-macros.h" // HUGE_VALF, HUGE_VALF @@ -16,195 +15,6 @@ namespace LIBC_NAMESPACE { -enum Value { ZERO, ONE, TWO, MIN, MAX }; - -template auto create(Value value) { - switch (value) { - case ZERO: - return T(0); - case ONE: - return T(1); - case TWO: - return T(2); - case MIN: - return T::min(); - case MAX: - return T::max(); - } -} - -using Types = testing::TypeList< // -#ifdef LIBC_TYPES_HAS_INT64 - BigInt<64, false, uint64_t>, // 64-bits unsigned (1 x uint64_t) - BigInt<64, true, uint64_t>, // 64-bits signed (1 x uint64_t) -#endif -#ifdef LIBC_TYPES_HAS_INT128 - BigInt<128, false, __uint128_t>, // 128-bits unsigned (1 x __uint128_t) - BigInt<128, true, __uint128_t>, // 128-bits signed (1 x __uint128_t) -#endif - BigInt<16, false, uint16_t>, // 16-bits unsigned (1 x uint16_t) - BigInt<16, true, uint16_t>, // 16-bits signed (1 x uint16_t) - BigInt<64, false, uint16_t>, // 64-bits unsigned (4 x uint16_t) - BigInt<64, true, uint16_t> // 64-bits signed (4 x uint16_t) - >; - -#define ASSERT_SAME(A, B) ASSERT_TRUE((A) == (B)) - -TYPED_TEST(LlvmLibcUIntClassTest, Additions, Types) { - ASSERT_SAME(create(ZERO) + create(ZERO), create(ZERO)); - ASSERT_SAME(create(ONE) + create(ZERO), create(ONE)); - ASSERT_SAME(create(ZERO) + create(ONE), create(ONE)); - ASSERT_SAME(create(ONE) + create(ONE), create(TWO)); - // 2's complement addition works for signed and unsigned types. - // - unsigned : 0xff + 0x01 = 0x00 (255 + 1 = 0) - // - signed : 0xef + 0x01 = 0xf0 (127 + 1 = -128) - ASSERT_SAME(create(MAX) + create(ONE), create(MIN)); -} - -TYPED_TEST(LlvmLibcUIntClassTest, Subtraction, Types) { - ASSERT_SAME(create(ZERO) - create(ZERO), create(ZERO)); - ASSERT_SAME(create(ONE) - create(ONE), create(ZERO)); - ASSERT_SAME(create(ONE) - create(ZERO), create(ONE)); - // 2's complement subtraction works for signed and unsigned types. - // - unsigned : 0x00 - 0x01 = 0xff ( 0 - 1 = 255) - // - signed : 0xf0 - 0x01 = 0xef (-128 - 1 = 127) - ASSERT_SAME(create(MIN) - create(ONE), create(MAX)); -} - -TYPED_TEST(LlvmLibcUIntClassTest, Multiplication, Types) { - ASSERT_SAME(create(ZERO) * create(ZERO), create(ZERO)); - ASSERT_SAME(create(ZERO) * create(ONE), create(ZERO)); - ASSERT_SAME(create(ONE) * create(ZERO), create(ZERO)); - ASSERT_SAME(create(ONE) * create(ONE), create(ONE)); - ASSERT_SAME(create(ONE) * create(TWO), create(TWO)); - ASSERT_SAME(create(TWO) * create(ONE), create(TWO)); - // - unsigned : 0xff x 0xff = 0x01 (mod 0xff) - // - signed : 0xef x 0xef = 0x01 (mod 0xff) - ASSERT_SAME(create(MAX) * create(MAX), create(ONE)); -} - -template void print(const char *msg, T value) { - testing::tlog << msg; - IntegerToString buffer(value); - testing::tlog << buffer.view() << "\n"; -} - -TEST(LlvmLibcUIntClassTest, SignedAddSub) { - // Computations performed by https://www.wolframalpha.com/ - using T = BigInt<128, true, uint32_t>; - const T a = parse_bigint("1927508279017230597"); - const T b = parse_bigint("278789278723478925"); - const T s = parse_bigint("2206297557740709522"); - // Addition - ASSERT_SAME(a + b, s); - ASSERT_SAME(b + a, s); // commutative - // Subtraction - ASSERT_SAME(a - s, -b); - ASSERT_SAME(s - a, b); -} - -TEST(LlvmLibcUIntClassTest, SignedMulDiv) { - // Computations performed by https://www.wolframalpha.com/ - using T = BigInt<128, true, uint16_t>; - struct { - const char *a; - const char *b; - const char *mul; - } const test_cases[] = {{"-4", "3", "-12"}, - {"-3", "-3", "9"}, - {"1927508279017230597", "278789278723478925", - "537368642840747885329125014794668225"}}; - for (auto tc : test_cases) { - const T a = parse_bigint(tc.a); - const T b = parse_bigint(tc.b); - const T mul = parse_bigint(tc.mul); - // Multiplication - ASSERT_SAME(a * b, mul); - ASSERT_SAME(b * a, mul); // commutative - ASSERT_SAME(a * -b, -mul); // sign - ASSERT_SAME(-a * b, -mul); // sign - ASSERT_SAME(-a * -b, mul); // sign - // Division - ASSERT_SAME(mul / a, b); - ASSERT_SAME(mul / b, a); - ASSERT_SAME(-mul / a, -b); // sign - ASSERT_SAME(mul / -a, -b); // sign - ASSERT_SAME(-mul / -a, b); // sign - } -} - -TYPED_TEST(LlvmLibcUIntClassTest, Division, Types) { - ASSERT_SAME(create(ZERO) / create(ONE), create(ZERO)); - ASSERT_SAME(create(MAX) / create(ONE), create(MAX)); - ASSERT_SAME(create(MAX) / create(MAX), create(ONE)); - ASSERT_SAME(create(ONE) / create(ONE), create(ONE)); - if constexpr (T::SIGNED) { - // Special case found by fuzzing. - ASSERT_SAME(create(MIN) / create(MIN), create(ONE)); - } - // - unsigned : 0xff / 0x02 = 0x7f - // - signed : 0xef / 0x02 = 0x77 - ASSERT_SAME(create(MAX) / create(TWO), (create(MAX) >> 1)); - - using word_type = typename T::word_type; - const T zero_one_repeated = T::all_ones() / T(0xff); - const word_type pattern = word_type(~0) / word_type(0xff); - for (const word_type part : zero_one_repeated.val) { - if constexpr (T::SIGNED == false) { - EXPECT_EQ(part, pattern); - } - } -} - -TYPED_TEST(LlvmLibcUIntClassTest, is_neg, Types) { - EXPECT_FALSE(create(ZERO).is_neg()); - EXPECT_FALSE(create(ONE).is_neg()); - EXPECT_FALSE(create(TWO).is_neg()); - EXPECT_EQ(create(MIN).is_neg(), T::SIGNED); - EXPECT_FALSE(create(MAX).is_neg()); -} - -TYPED_TEST(LlvmLibcUIntClassTest, Masks, Types) { - if constexpr (!T::SIGNED) { - constexpr size_t BITS = T::BITS; - // mask_trailing_ones - ASSERT_SAME((mask_trailing_ones()), T::zero()); - ASSERT_SAME((mask_trailing_ones()), T::one()); - ASSERT_SAME((mask_trailing_ones()), T::all_ones() >> 1); - ASSERT_SAME((mask_trailing_ones()), T::all_ones()); - // mask_leading_ones - ASSERT_SAME((mask_leading_ones()), T::zero()); - ASSERT_SAME((mask_leading_ones()), T::one() << (BITS - 1)); - ASSERT_SAME((mask_leading_ones()), T::all_ones() - T::one()); - ASSERT_SAME((mask_leading_ones()), T::all_ones()); - // mask_trailing_zeros - ASSERT_SAME((mask_trailing_zeros()), T::all_ones()); - ASSERT_SAME((mask_trailing_zeros()), T::all_ones() - T::one()); - ASSERT_SAME((mask_trailing_zeros()), T::one() << (BITS - 1)); - ASSERT_SAME((mask_trailing_zeros()), T::zero()); - // mask_trailing_zeros - ASSERT_SAME((mask_leading_zeros()), T::all_ones()); - ASSERT_SAME((mask_leading_zeros()), T::all_ones() >> 1); - ASSERT_SAME((mask_leading_zeros()), T::one()); - ASSERT_SAME((mask_leading_zeros()), T::zero()); - } -} - -TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) { - if constexpr (!T::SIGNED) { - for (size_t i = 0; i <= T::BITS; ++i) { - const auto l_one = T::all_ones() << i; // 0b111...000 - const auto r_one = T::all_ones() >> i; // 0b000...111 - const int zeros = i; - const int ones = T::BITS - zeros; - ASSERT_EQ(cpp::countr_one(r_one), ones); - ASSERT_EQ(cpp::countl_one(l_one), ones); - ASSERT_EQ(cpp::countr_zero(l_one), zeros); - ASSERT_EQ(cpp::countl_zero(r_one), zeros); - } - } -} - using LL_UInt64 = UInt<64>; // We want to test UInt<128> explicitly. So, for // convenience, we use a sugar which does not conflict with the UInt128 type @@ -751,7 +561,7 @@ TEST(LlvmLibcUIntClassTest, FullMulTests) { LL_UInt##Bits a = ~LL_UInt##Bits(0); \ LL_UInt##Bits hi = a.quick_mul_hi(a); \ LL_UInt##Bits trunc = static_cast(a.ful_mul(a) >> Bits); \ - uint64_t overflow = trunc.sub_overflow(hi); \ + uint64_t overflow = trunc.sub(hi); \ EXPECT_EQ(overflow, uint64_t(0)); \ EXPECT_LE(uint64_t(trunc), uint64_t(Error)); \ } while (0) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index c0d402a8..4f97612 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -87,7 +87,6 @@ libc_test( srcs = ["uint_test.cpp"], deps = [ "//libc:__support_cpp_optional", - "//libc:__support_integer_literals", "//libc:__support_macros_properties_types", "//libc:__support_uint", "//libc:llvm_libc_macros_math_macros", -- cgit v1.1 From 61efea7142e904e6492e1ce0566ec23d9d221c1e Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Apr 2024 10:12:33 +0100 Subject: [libclc] Fix a couple of issues preventing in-tree builds (#87505) libclc is mentioned in the list of LLVM_ENABLE_PROJECTS but it isn't actually possible to build it in-tree for various reasons. Users currently have to build it via LLVM_ENABLE_EXTERNAL_PROJECTS, which isn't very well documented. We can't properly build in-tree because the current system needs to "see" clang and other tools at CMake configuration time. The general idea is that we could fix this in the future by moving the compilation and linking of bitcode libraries to custom commands, which would remove the dependency on CMake configuration and would allow us to build libclc after clang and other tools are built in-tree. Since that's a bigger change, it is being left for later. Note that with this commit it's *still* not possible to properly build in-tree - this commit just fixes a few little things that are in the way. We are now able to build in-tree in the sense that it can be built as a regular LLVM sub-project, but the tools it uses to compile the libraries are still picked up from a pre-existing installation of LLVM, and not from tools built during the same build as libclc. The things fixed by this commit include: * Its use of CMAKE_SOURCE_DIR (i.e., assuming it was the top-level project) * These have been converted to PROJECT_SOURCE_DIR - should have no consequences for out-of-tree builds. * Its prepare_builtins tool insisting on linking against the dynamic LLVM.so. * This has been turned from an "llvm executable" into an "llvm utility" which links against the static libraries. * It was also missing a link component for the IRReader library. * Assuming an output path for its builtin libraries (dependent on the working directory) * This has been changed to query CMake for the library target's output file. * The spirv-mesa3d and spirv64-mesa3d targets were enabled by default (or when asking to build 'all' libclc targets), when they require llvm-spirv as an external dependency. * They are now only built when the user explicitly asks for them, or when llvm-spirv is available and the user asks for 'all'. --- libclc/CMakeLists.txt | 102 +++++++++++++++++++++++++++----------------------- 1 file changed, 56 insertions(+), 46 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 9236f09..2d000cf 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -20,21 +20,6 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS spirv64/lib/SOURCES ) -# List of all targets -set( LIBCLC_TARGETS_ALL - amdgcn-- - amdgcn--amdhsa - clspv-- - clspv64-- - r600-- - nvptx-- - nvptx64-- - nvptx--nvidiacl - nvptx64--nvidiacl - spirv-mesa3d- - spirv64-mesa3d- -) - set( LIBCLC_MIN_LLVM "3.9.0" ) set( LIBCLC_TARGETS_TO_BUILD "all" @@ -51,15 +36,6 @@ if( ${LLVM_PACKAGE_VERSION} VERSION_LESS ${LIBCLC_MIN_LLVM} ) message( FATAL_ERROR "libclc needs at least LLVM ${LIBCLC_MIN_LLVM}" ) endif() -# mesa3d environment is only available since LLVM 4.0 -if( ${LLVM_PACKAGE_VERSION} VERSION_GREATER "3.9.0" ) - set( LIBCLC_TARGETS_ALL ${LIBCLC_TARGETS_ALL} amdgcn-mesa-mesa3d ) -endif() - -if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" ) - set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} ) -endif() - find_program( LLVM_CLANG clang PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) find_program( LLVM_AS llvm-as PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) find_program( LLVM_LINK llvm-link PATHS ${LLVM_TOOLS_BINARY_DIR} NO_DEFAULT_PATH ) @@ -76,15 +52,45 @@ if( NOT LLVM_CLANG OR NOT LLVM_OPT OR NOT LLVM_AS OR NOT LLVM_LINK ) message( FATAL_ERROR "libclc toolchain incomplete!" ) endif() +# List of all targets. Note that some are added dynamically below. +set( LIBCLC_TARGETS_ALL + amdgcn-- + amdgcn--amdhsa + clspv-- + clspv64-- + r600-- + nvptx-- + nvptx64-- + nvptx--nvidiacl + nvptx64--nvidiacl +) + +# mesa3d environment is only available since LLVM 4.0 +if( ${LLVM_PACKAGE_VERSION} VERSION_GREATER "3.9.0" ) + list( APPEND LIBCLC_TARGETS_ALL amdgcn-mesa-mesa3d ) +endif() + +# spirv-mesa3d and spirv64-mesa3d targets can only be built with the (optional) +# llvm-spirv external tool. +if( LLVM_SPIRV ) + list( APPEND LIBCLC_TARGETS_ALL spirv-mesa3d- spirv64-mesa3d- ) +endif() + +if( LIBCLC_TARGETS_TO_BUILD STREQUAL "all" ) + set( LIBCLC_TARGETS_TO_BUILD ${LIBCLC_TARGETS_ALL} ) +endif() + list( SORT LIBCLC_TARGETS_TO_BUILD ) +# Verify that the user hasn't requested mesa3d targets without an available +# llvm-spirv tool. if( "spirv-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD OR "spirv64-mesa3d-" IN_LIST LIBCLC_TARGETS_TO_BUILD ) if( NOT LLVM_SPIRV ) message( FATAL_ERROR "SPIR-V targets requested, but spirv-tools is not installed" ) endif() endif() -set( CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/cmake ) +set( CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake ) set( CMAKE_CLC_COMPILER ${LLVM_CLANG} ) set( CMAKE_CLC_ARCHIVE ${LLVM_LINK} ) set( CMAKE_LLAsm_PREPROCESSOR ${LLVM_CLANG} ) @@ -113,9 +119,10 @@ set(LLVM_LINK_COMPONENTS BitReader BitWriter Core + IRReader Support ) -add_llvm_executable( prepare_builtins utils/prepare-builtins.cpp ) +add_llvm_utility( prepare_builtins utils/prepare-builtins.cpp ) target_compile_definitions( prepare_builtins PRIVATE ${LLVM_VERSION_DEFINE} ) # These were not properly reported in early LLVM and we don't need them target_compile_options( prepare_builtins PRIVATE -fno-rtti -fno-exceptions ) @@ -165,7 +172,7 @@ if( ENABLE_RUNTIME_SUBNORMAL ) endif() find_package( Python3 REQUIRED COMPONENTS Interpreter ) -file( TO_CMAKE_PATH ${CMAKE_SOURCE_DIR}/generic/lib/gen_convert.py script_loc ) +file( TO_CMAKE_PATH ${PROJECT_SOURCE_DIR}/generic/lib/gen_convert.py script_loc ) add_custom_command( OUTPUT convert.cl COMMAND ${Python3_EXECUTABLE} ${script_loc} > convert.cl @@ -210,7 +217,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) foreach( l ${dirs} ${DARCH} ${DARCH}-${OS} ${DARCH}-${VENDOR}-${OS} ) foreach( s "SOURCES" "SOURCES_${LLVM_MAJOR}.${LLVM_MINOR}" ) file( TO_CMAKE_PATH ${l}/lib/${s} file_loc ) - file( TO_CMAKE_PATH ${CMAKE_SOURCE_DIR}/${file_loc} loc ) + file( TO_CMAKE_PATH ${PROJECT_SOURCE_DIR}/${file_loc} loc ) # Prepend the location to give higher priority to # specialized implementation if( EXISTS ${loc} ) @@ -246,7 +253,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND objects ${f} ) list( APPEND rel_files ${dir}/${f} ) # FIXME: This should really go away - file( TO_CMAKE_PATH ${CMAKE_SOURCE_DIR}/${dir}/${f} src_loc ) + file( TO_CMAKE_PATH ${PROJECT_SOURCE_DIR}/${dir}/${f} src_loc ) get_filename_component( fdir ${src_loc} DIRECTORY ) set_source_files_properties( ${dir}/${f} @@ -288,53 +295,56 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) set( opt_flags -O3 ) endif() - add_library( builtins.link.${arch_suffix} STATIC ${rel_files} ) + set( builtins_link_lib_tgt builtins.link.${arch_suffix} ) + add_library( ${builtins_link_lib_tgt} STATIC ${rel_files} ) # Make sure we depend on the pseudo target to prevent # multiple invocations - add_dependencies( builtins.link.${arch_suffix} generate_convert.cl ) - add_dependencies( builtins.link.${arch_suffix} clspv-generate_convert.cl ) + add_dependencies( ${builtins_link_lib_tgt} generate_convert.cl ) + add_dependencies( ${builtins_link_lib_tgt} clspv-generate_convert.cl ) # CMake will turn this include into absolute path - target_include_directories( builtins.link.${arch_suffix} PRIVATE + target_include_directories( ${builtins_link_lib_tgt} PRIVATE "generic/include" ) - target_compile_definitions( builtins.link.${arch_suffix} PRIVATE + target_compile_definitions( ${builtins_link_lib_tgt} PRIVATE "__CLC_INTERNAL" ) string( TOUPPER "-DCLC_${ARCH}" CLC_TARGET_DEFINE ) - target_compile_definitions( builtins.link.${arch_suffix} PRIVATE + target_compile_definitions( ${builtins_link_lib_tgt} PRIVATE ${CLC_TARGET_DEFINE} ) - target_compile_options( builtins.link.${arch_suffix} PRIVATE -target + target_compile_options( ${builtins_link_lib_tgt} PRIVATE -target ${t} ${mcpu} -fno-builtin -nostdlib ${build_flags} ) - set_target_properties( builtins.link.${arch_suffix} PROPERTIES + set_target_properties( ${builtins_link_lib_tgt} PROPERTIES LINKER_LANGUAGE CLC ) set( obj_suffix ${arch_suffix}.bc ) + set( builtins_opt_lib_tgt builtins.opt.${obj_suffix} ) # Add opt target - add_custom_command( OUTPUT "builtins.opt.${obj_suffix}" - COMMAND ${LLVM_OPT} ${opt_flags} -o "builtins.opt.${obj_suffix}" "builtins.link.${obj_suffix}" - DEPENDS "builtins.link.${arch_suffix}" ) + add_custom_command( OUTPUT ${builtins_opt_lib_tgt} + COMMAND ${LLVM_OPT} ${opt_flags} -o ${builtins_opt_lib_tgt} + $ + DEPENDS ${builtins_link_lib_tgt} ) add_custom_target( "opt.${obj_suffix}" ALL - DEPENDS "builtins.opt.${obj_suffix}" ) + DEPENDS ${builtins_opt_lib_tgt} ) if( ${ARCH} STREQUAL "spirv" OR ${ARCH} STREQUAL "spirv64" ) set( spv_suffix ${arch_suffix}.spv ) add_custom_command( OUTPUT "${spv_suffix}" - COMMAND ${LLVM_SPIRV} ${spvflags} -o "${spv_suffix}" "builtins.link.${obj_suffix}" - DEPENDS "builtins.link.${arch_suffix}" ) + COMMAND ${LLVM_SPIRV} ${spvflags} -o "${spv_suffix}" ${builtins_opt_lib_tgt} + DEPENDS ${builtins_link_lib_tgt} ) add_custom_target( "prepare-${spv_suffix}" ALL DEPENDS "${spv_suffix}" ) install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${spv_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) else() # Add prepare target add_custom_command( OUTPUT "${obj_suffix}" - COMMAND prepare_builtins -o "${obj_suffix}" "builtins.opt.${obj_suffix}" - DEPENDS "opt.${obj_suffix}" "builtins.opt.${obj_suffix}" prepare_builtins ) + COMMAND prepare_builtins -o "${obj_suffix}" ${builtins_opt_lib_tgt} + DEPENDS "opt.${obj_suffix}" ${builtins_opt_lib_tgt} prepare_builtins ) add_custom_target( "prepare-${obj_suffix}" ALL DEPENDS "${obj_suffix}" ) # nvptx-- targets don't include workitem builtins if( NOT ${t} MATCHES ".*ptx.*--$" ) add_test( NAME external-calls-${obj_suffix} COMMAND ./check_external_calls.sh ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} ${LLVM_TOOLS_BINARY_DIR} - WORKING_DIRECTORY ${CMAKE_SOURCE_DIR} ) + WORKING_DIRECTORY ${PROJECT_SOURCE_DIR} ) endif() install( FILES ${CMAKE_CURRENT_BINARY_DIR}/${obj_suffix} DESTINATION "${CMAKE_INSTALL_DATADIR}/clc" ) -- cgit v1.1 From 3cf539fb046457a444e93cefc87cca10cbd3b807 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 4 Apr 2024 10:14:16 +0100 Subject: [AMDGPU] Combine or remove redundant waitcnts at the end of each MBB (#87539) Call generateWaitcnt unconditionally at the end of SIInsertWaitcnts::insertWaitcntInBlock. Even if we don't need to generate a new waitcnt instruction it has the effect of combining or removing redundant waitcnts that were already present. Tests show various small improvements in waitcnt placement. --- llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp | 45 ++++------- .../AMDGPU/GlobalISel/divergent-control-flow.ll | 4 - .../AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll | 2 - .../AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll | 2 - .../AMDGPU/atomic_optimizations_local_pointer.ll | 86 ++++++++++------------ llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll | 2 +- .../CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll | 2 +- .../test/CodeGen/AMDGPU/extract-subvector-16bit.ll | 11 --- llvm/test/CodeGen/AMDGPU/function-args.ll | 1 - .../CodeGen/AMDGPU/lds-global-non-entry-func.ll | 28 +++---- llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll | 8 +- llvm/test/CodeGen/AMDGPU/skip-if-dead.ll | 4 - .../transform-block-with-return-to-epilog.ll | 4 +- 13 files changed, 73 insertions(+), 126 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp index 2762190..bb499c5 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp @@ -708,9 +708,6 @@ public: WaitcntBrackets &ScoreBrackets, MachineInstr *OldWaitcntInstr, bool FlushVmCnt); - bool generateWaitcntBlockEnd(MachineBasicBlock &Block, - WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr); bool generateWaitcnt(AMDGPU::Waitcnt Wait, MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, WaitcntBrackets &ScoreBrackets, @@ -1902,31 +1899,6 @@ bool SIInsertWaitcnts::generateWaitcntInstBefore(MachineInstr &MI, OldWaitcntInstr); } -// Add a waitcnt to flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the -// end of the given block if needed. -bool SIInsertWaitcnts::generateWaitcntBlockEnd(MachineBasicBlock &Block, - WaitcntBrackets &ScoreBrackets, - MachineInstr *OldWaitcntInstr) { - AMDGPU::Waitcnt Wait; - - unsigned LoadCntPending = ScoreBrackets.hasPendingEvent(LOAD_CNT); - unsigned SampleCntPending = ScoreBrackets.hasPendingEvent(SAMPLE_CNT); - unsigned BvhCntPending = ScoreBrackets.hasPendingEvent(BVH_CNT); - - if (LoadCntPending == 0 && SampleCntPending == 0 && BvhCntPending == 0) - return false; - - if (LoadCntPending != 0) - Wait.LoadCnt = 0; - if (SampleCntPending != 0) - Wait.SampleCnt = 0; - if (BvhCntPending != 0) - Wait.BvhCnt = 0; - - return generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets, - OldWaitcntInstr); -} - bool SIInsertWaitcnts::generateWaitcnt(AMDGPU::Waitcnt Wait, MachineBasicBlock::instr_iterator It, MachineBasicBlock &Block, @@ -2355,9 +2327,22 @@ bool SIInsertWaitcnts::insertWaitcntInBlock(MachineFunction &MF, ++Iter; } + // Flush the LOADcnt, SAMPLEcnt and BVHcnt counters at the end of the block if + // needed. + AMDGPU::Waitcnt Wait; if (Block.getFirstTerminator() == Block.end() && - isPreheaderToFlush(Block, ScoreBrackets)) - Modified |= generateWaitcntBlockEnd(Block, ScoreBrackets, OldWaitcntInstr); + isPreheaderToFlush(Block, ScoreBrackets)) { + if (ScoreBrackets.hasPendingEvent(LOAD_CNT)) + Wait.LoadCnt = 0; + if (ScoreBrackets.hasPendingEvent(SAMPLE_CNT)) + Wait.SampleCnt = 0; + if (ScoreBrackets.hasPendingEvent(BVH_CNT)) + Wait.BvhCnt = 0; + } + + // Combine or remove any redundant waitcnts at the end of the block. + Modified |= generateWaitcnt(Wait, Block.instr_end(), Block, ScoreBrackets, + OldWaitcntInstr); return Modified; } diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll index c25b0f2..78d9084 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/divergent-control-flow.ll @@ -16,7 +16,6 @@ define i32 @divergent_if_swap_brtarget_order0(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB0_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = icmp ne i32 %value, 0 @@ -44,7 +43,6 @@ define i32 @divergent_if_swap_brtarget_order1(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB1_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = icmp ne i32 %value, 0 @@ -74,7 +72,6 @@ define i32 @divergent_if_nonboolean_condition0(i32 %value) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB2_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %c = trunc i32 %value to i1 @@ -106,7 +103,6 @@ define i32 @divergent_if_nonboolean_condition1(ptr addrspace(1) %ptr) { ; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: .LBB3_2: ; %endif ; CHECK-NEXT: s_or_b64 exec, exec, s[4:5] -; CHECK-NEXT: s_waitcnt vmcnt(0) ; CHECK-NEXT: s_setpc_b64 s[30:31] entry: %value = load i32, ptr addrspace(1) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll index 303dc46..5c22d5b 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.private.ll @@ -131,8 +131,6 @@ define amdgpu_kernel void @is_private_sgpr(ptr %ptr) { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB1_2: ; %bb1 -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %val = call i1 @llvm.amdgcn.is.private(ptr %ptr) br i1 %val, label %bb0, label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll index 63702d2..e005c38 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.is.shared.ll @@ -131,8 +131,6 @@ define amdgpu_kernel void @is_local_sgpr(ptr %ptr) { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB1_2: ; %bb1 -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm %val = call i1 @llvm.amdgcn.is.shared(ptr %ptr) br i1 %val, label %bb0, label %bb1 diff --git a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll index 352adac..af6f6913 100644 --- a/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll +++ b/llvm/test/CodeGen/AMDGPU/atomic_optimizations_local_pointer.ll @@ -39,9 +39,9 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 ; GFX7LESS-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -65,11 +65,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB0_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v1 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -92,11 +92,11 @@ define amdgpu_kernel void @add_i32_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB0_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_mad_u32_u24 v0, v0, 5, s4 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -253,8 +253,8 @@ define amdgpu_kernel void @add_i32_uniform(ptr addrspace(1) %out, i32 %additive) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0 ; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -504,11 +504,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB2_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -544,11 +544,11 @@ define amdgpu_kernel void @add_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB2_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_add_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -944,7 +944,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 @@ -952,6 +951,7 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 ; GFX7LESS-NEXT: v_add_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: v_addc_u32_e32 v1, vcc, v2, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -974,7 +974,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB4_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s2, v0 ; GFX8-NEXT: v_readfirstlane_b32 s3, v1 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1006,7 +1005,6 @@ define amdgpu_kernel void @add_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB4_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s2, v0 ; GFX9-NEXT: v_readfirstlane_b32 s3, v1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -1219,11 +1217,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB5_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mov_b32_e32 v0, s4 ; GFX8-NEXT: v_mov_b32_e32 v1, s5 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_mul_lo_u32 v3, s3, v2 ; GFX8-NEXT: v_mad_u64_u32 v[0:1], s[2:3], s2, v2, v[0:1] ; GFX8-NEXT: s_mov_b32 s7, 0xf000 @@ -1258,11 +1256,11 @@ define amdgpu_kernel void @add_i64_uniform(ptr addrspace(1) %out, i64 %additive) ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB5_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mov_b32_e32 v0, s4 ; GFX9-NEXT: v_mov_b32_e32 v1, s5 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], s2, v2, v[0:1] ; GFX9-NEXT: s_mov_b32 s7, 0xf000 ; GFX9-NEXT: s_mov_b32 s6, -1 @@ -1530,10 +1528,10 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 ; GFX7LESS-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -1557,12 +1555,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB7_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v1 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v0 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -1585,12 +1583,12 @@ define amdgpu_kernel void @sub_i32_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB7_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v1 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -1751,8 +1749,8 @@ define amdgpu_kernel void @sub_i32_uniform(ptr addrspace(1) %out, i32 %subitive) ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v1 +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_mul_lo_u32 v0, s6, v0 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: buffer_store_dword v0, off, s[0:3], 0 @@ -2006,11 +2004,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB9_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_sub_u32_e32 v0, vcc, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -2046,11 +2044,11 @@ define amdgpu_kernel void @sub_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB9_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_sub_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -2446,7 +2444,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mul_hi_u32_u24_e32 v1, 5, v2 @@ -2454,6 +2451,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s5 ; GFX7LESS-NEXT: v_sub_i32_e32 v0, vcc, s4, v0 ; GFX7LESS-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -2477,7 +2475,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB11_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mul_u32_u24_e32 v0, 5, v2 @@ -2487,6 +2484,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_subb_u32_e32 v1, vcc, v2, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -2509,7 +2507,6 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB11_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mul_u32_u24_e32 v0, 5, v2 @@ -2519,6 +2516,7 @@ define amdgpu_kernel void @sub_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v2, v1, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3081,11 +3079,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB14_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3121,11 +3119,11 @@ define amdgpu_kernel void @and_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB14_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_and_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3355,11 +3353,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB15_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_or_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3395,11 +3393,11 @@ define amdgpu_kernel void @or_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB15_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_or_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3629,11 +3627,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB16_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_xor_b32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3669,11 +3667,11 @@ define amdgpu_kernel void @xor_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB16_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_xor_b32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -3903,11 +3901,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB17_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_max_i32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -3943,11 +3941,11 @@ define amdgpu_kernel void @max_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB17_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_max_i32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -4151,7 +4149,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, 1 @@ -4162,6 +4159,7 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -4182,7 +4180,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB18_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_bfrev_b32_e32 v0, 1 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4216,7 +4213,6 @@ define amdgpu_kernel void @max_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB18_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v0, 1 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4419,11 +4415,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB19_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_min_i32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -4459,11 +4455,11 @@ define amdgpu_kernel void @min_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB19_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_min_i32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -4667,7 +4663,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_bfrev_b32_e32 v1, -2 @@ -4678,6 +4673,7 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -4698,7 +4694,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: .LBB20_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_bfrev_b32_e32 v0, -2 ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4732,7 +4727,6 @@ define amdgpu_kernel void @min_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB20_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_bfrev_b32_e32 v0, -2 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 @@ -4935,11 +4929,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB21_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_max_u32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -4975,11 +4969,11 @@ define amdgpu_kernel void @umax_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB21_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_max_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5183,7 +5177,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_mov_b32_e32 v1, 0 @@ -5193,6 +5186,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v1, s5 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -5214,7 +5208,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB22_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_mov_b32_e32 v1, 0 @@ -5226,6 +5219,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX8-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5246,7 +5240,6 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB22_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 @@ -5258,6 +5251,7 @@ define amdgpu_kernel void @umax_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc ; GFX9-NEXT: v_cndmask_b32_e32 v1, 0, v1, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5446,11 +5440,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB23_4: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_min_u32_e32 v0, s4, v1 +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5486,11 +5480,11 @@ define amdgpu_kernel void @umin_i32_varying(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB23_4: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_min_u32_e32 v0, s4, v1 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dword v0, off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; @@ -5694,7 +5688,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x9 ; GFX7LESS-NEXT: s_mov_b32 s3, 0xf000 ; GFX7LESS-NEXT: s_mov_b32 s2, -1 -; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: v_readfirstlane_b32 s4, v0 ; GFX7LESS-NEXT: v_readfirstlane_b32 s5, v1 ; GFX7LESS-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5704,6 +5697,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX7LESS-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc ; GFX7LESS-NEXT: v_mov_b32_e32 v2, s4 ; GFX7LESS-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX7LESS-NEXT: s_waitcnt lgkmcnt(0) ; GFX7LESS-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX7LESS-NEXT: s_endpgm ; @@ -5725,7 +5719,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: .LBB24_2: ; GFX8-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX8-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: v_readfirstlane_b32 s4, v0 ; GFX8-NEXT: v_readfirstlane_b32 s5, v1 ; GFX8-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5737,6 +5730,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX8-NEXT: s_mov_b32 s3, 0xf000 ; GFX8-NEXT: s_mov_b32 s2, -1 ; GFX8-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX8-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX8-NEXT: s_endpgm ; @@ -5757,7 +5751,6 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: .LBB24_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[2:3] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s4, v0 ; GFX9-NEXT: v_readfirstlane_b32 s5, v1 ; GFX9-NEXT: v_cndmask_b32_e64 v1, 0, -1, vcc @@ -5769,6 +5762,7 @@ define amdgpu_kernel void @umin_i64_constant(ptr addrspace(1) %out) { ; GFX9-NEXT: s_mov_b32 s3, 0xf000 ; GFX9-NEXT: s_mov_b32 s2, -1 ; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: buffer_store_dwordx2 v[0:1], off, s[0:3], 0 ; GFX9-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll index 19a1d2d9..c9076a9 100644 --- a/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll +++ b/llvm/test/CodeGen/AMDGPU/atomicrmw-expand.ll @@ -186,7 +186,7 @@ define float @syncscope_workgroup_rtn(ptr %addr, float %val) #0 { ; GFX90A-NEXT: s_waitcnt lgkmcnt(0) ; GFX90A-NEXT: .LBB1_8: ; %atomicrmw.phi ; GFX90A-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX90A-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX90A-NEXT: s_waitcnt vmcnt(0) ; GFX90A-NEXT: v_mov_b32_e32 v0, v3 ; GFX90A-NEXT: s_setpc_b64 s[30:31] ; diff --git a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll index ac50fb8..da609bf 100644 --- a/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll +++ b/llvm/test/CodeGen/AMDGPU/cgp-addressing-modes-gfx908.ll @@ -41,7 +41,7 @@ define amdgpu_kernel void @test_sink_small_offset_global_atomic_fadd_f32(ptr add ; GCN-NEXT: .LBB0_2: ; %endif ; GCN-NEXT: s_or_b64 exec, exec, s[4:5] ; GCN-NEXT: v_mov_b32_e32 v1, 0x3d0000 -; GCN-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GCN-NEXT: s_waitcnt lgkmcnt(0) ; GCN-NEXT: global_store_dword v1, v0, s[0:1] offset:2300 ; GCN-NEXT: s_endpgm entry: diff --git a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll index 069c57e..6dabd8c 100644 --- a/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll +++ b/llvm/test/CodeGen/AMDGPU/extract-subvector-16bit.ll @@ -103,7 +103,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB0_4: ; %exit -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v3 op_sel_hi:[0,0] ; GFX9-NEXT: s_movk_i32 s4, 0x8000 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0 @@ -131,7 +130,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1) ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB0_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -266,7 +264,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace( ; GFX9-NEXT: global_load_dwordx4 v[2:5], v[0:1], off glc ; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: .LBB1_4: ; %exit -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_pk_ashrrev_i16 v0, 15, v5 op_sel_hi:[0,1] ; GFX9-NEXT: s_movk_i32 s4, 0x8000 ; GFX9-NEXT: v_or_b32_e32 v1, 0xffff8000, v0 @@ -294,7 +291,6 @@ define <4 x i16> @vec_8xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace( ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB1_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -431,7 +427,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX9-NEXT: .LBB2_4: ; %exit ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3900 ; GFX9-NEXT: v_mov_b32_e32 v1, 0x3d00 -; GFX9-NEXT: s_waitcnt vmcnt(0) ; GFX9-NEXT: v_cmp_ge_f16_e32 vcc, 0.5, v2 ; GFX9-NEXT: v_mov_b32_e32 v5, 0x3800 ; GFX9-NEXT: v_cndmask_b32_e32 v4, v0, v1, vcc @@ -461,7 +456,6 @@ define <4 x half> @vec_8xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB2_4: ; %exit ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -665,7 +659,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16(ptr addrspace(1) %p0, ptr addrspace(1 ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB3_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v2 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v3 op_sel_hi:[0,0] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -871,7 +864,6 @@ define <4 x i16> @vec_16xi16_extract_4xi16_2(ptr addrspace(1) %p0, ptr addrspace ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB4_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_pk_ashrrev_i16 v0, 15, v4 op_sel_hi:[0,1] ; GFX11-NEXT: v_pk_ashrrev_i16 v1, 15, v5 op_sel_hi:[0,1] ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2) @@ -1081,7 +1073,6 @@ define <4 x half> @vec_16xf16_extract_4xf16(ptr addrspace(1) %p0, ptr addrspace( ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB5_4: ; %exit ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3d00 -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_lshrrev_b32_e32 v1, 16, v2 ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v2 ; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3) @@ -1432,7 +1423,6 @@ define amdgpu_gfx <8 x i16> @vec_16xi16_extract_8xi16_0(i1 inreg %cond, ptr addr ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB7_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_gt_u16_e32 vcc_lo, 0x3801, v5 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00 @@ -1724,7 +1714,6 @@ define amdgpu_gfx <8 x half> @vec_16xf16_extract_8xf16_0(i1 inreg %cond, ptr add ; GFX11-NEXT: global_load_b128 v[2:5], v[0:1], off glc dlc ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: .LBB8_4: ; %exit -; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_cmp_ge_f16_e32 vcc_lo, 0.5, v5 ; GFX11-NEXT: v_mov_b32_e32 v9, 0x3900 ; GFX11-NEXT: v_mov_b32_e32 v1, 0x3d00 diff --git a/llvm/test/CodeGen/AMDGPU/function-args.ll b/llvm/test/CodeGen/AMDGPU/function-args.ll index db89ad6..3b2f15c 100644 --- a/llvm/test/CodeGen/AMDGPU/function-args.ll +++ b/llvm/test/CodeGen/AMDGPU/function-args.ll @@ -114,7 +114,6 @@ define void @i1_arg_i1_use(i1 %arg) #0 { ; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: .LBB3_2: ; %bb2 ; CIGFX89-NEXT: s_or_b64 exec, exec, s[4:5] -; CIGFX89-NEXT: s_waitcnt vmcnt(0) ; CIGFX89-NEXT: s_setpc_b64 s[30:31] ; ; GFX11-LABEL: i1_arg_i1_use: diff --git a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll index 433a836..3b3e107 100644 --- a/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll +++ b/llvm/test/CodeGen/AMDGPU/lds-global-non-entry-func.ll @@ -33,7 +33,7 @@ define void @func_use_lds_global() { ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -103,7 +103,7 @@ define void @func_use_lds_global_constexpr_cast() { ; GFX8-SDAG-LABEL: func_use_lds_global_constexpr_cast: ; GFX8-SDAG: ; %bb.0: ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-SDAG-NEXT: s_trap 2 @@ -171,7 +171,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_2 ; GFX8-SDAG-NEXT: ; %bb.1: ; %bb1 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 1 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -181,7 +181,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: s_cbranch_execz .LBB2_4 ; GFX8-SDAG-NEXT: ; %bb.3: ; %bb0 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -189,7 +189,7 @@ define void @func_uses_lds_multi(i1 %cond) { ; GFX8-SDAG-NEXT: .LBB2_4: ; %ret ; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 2 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -379,7 +379,7 @@ define void @func_uses_lds_code_after(ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v2 -; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[4:5], 0xc8 ; GFX8-SDAG-NEXT: v_mov_b32_e32 v2, 1 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[4:5], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -472,7 +472,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: ; %bb.1: ; %use.bb ; GFX8-SDAG-NEXT: v_mov_b32_e32 v0, 0 ; GFX8-SDAG-NEXT: s_mov_b32 m0, -1 -; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0 +; GFX8-SDAG-NEXT: s_mov_b64 s[6:7], 0xc8 ; GFX8-SDAG-NEXT: ds_write_b32 v0, v0 ; GFX8-SDAG-NEXT: s_load_dwordx2 s[0:1], s[6:7], 0x0 ; GFX8-SDAG-NEXT: s_waitcnt lgkmcnt(0) @@ -481,7 +481,6 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX8-SDAG-NEXT: .LBB4_2: ; %ret ; GFX8-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX8-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX8-GISEL-LABEL: func_uses_lds_phi_after: @@ -506,7 +505,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX8-GISEL-NEXT: .LBB4_2: ; %ret ; GFX8-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX8-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX8-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX8-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-SDAG-LABEL: func_uses_lds_phi_after: @@ -527,7 +526,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) ; GFX9-SDAG-NEXT: .LBB4_2: ; %ret ; GFX9-SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31] ; ; GFX9-GISEL-LABEL: func_uses_lds_phi_after: @@ -548,7 +547,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) ; GFX9-GISEL-NEXT: .LBB4_2: ; %ret ; GFX9-GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31] ; ; SDAG-LABEL: func_uses_lds_phi_after: @@ -570,7 +569,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; SDAG-NEXT: s_waitcnt vmcnt(0) ; SDAG-NEXT: .LBB4_3: ; %ret ; SDAG-NEXT: s_or_b64 exec, exec, s[4:5] -; SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; SDAG-NEXT: s_waitcnt lgkmcnt(0) ; SDAG-NEXT: s_setpc_b64 s[30:31] ; SDAG-NEXT: .LBB4_4: ; SDAG-NEXT: s_endpgm @@ -594,7 +593,7 @@ define i32 @func_uses_lds_phi_after(i1 %cond, ptr addrspace(1) %ptr) { ; GISEL-NEXT: s_waitcnt vmcnt(0) ; GISEL-NEXT: .LBB4_3: ; %ret ; GISEL-NEXT: s_or_b64 exec, exec, s[4:5] -; GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0) +; GISEL-NEXT: s_waitcnt lgkmcnt(0) ; GISEL-NEXT: s_setpc_b64 s[30:31] ; GISEL-NEXT: .LBB4_4: ; GISEL-NEXT: s_endpgm @@ -616,6 +615,3 @@ ret: ; CHECK: {{.*}} ; GFX8: {{.*}} ; GFX9: {{.*}} - -!llvm.module.flags = !{!0} -!0 = !{i32 1, !"amdhsa_code_object_version", i32 500} diff --git a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll index 5e76dfd..4477f02 100644 --- a/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll +++ b/llvm/test/CodeGen/AMDGPU/local-atomics-fp.ll @@ -157,7 +157,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; VI-NEXT: .LBB2_2: ; VI-NEXT: s_or_b64 exec, exec, s[6:7] ; VI-NEXT: s_mov_b64 s[6:7], exec -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s8, v1 ; VI-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0 ; VI-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1 @@ -203,15 +202,14 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; VI-NEXT: ; %bb.7: ; VI-NEXT: v_mov_b32_e32 v2, s2 ; VI-NEXT: s_mov_b32 m0, -1 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: ds_add_rtn_f32 v2, v2, v1 ; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: .LBB2_8: ; VI-NEXT: s_or_b64 exec, exec, s[4:5] ; VI-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_readfirstlane_b32 s2, v2 ; VI-NEXT: v_add_f32_e32 v2, s2, v0 +; VI-NEXT: s_waitcnt lgkmcnt(0) ; VI-NEXT: v_mov_b32_e32 v0, s0 ; VI-NEXT: v_mov_b32_e32 v1, s1 ; VI-NEXT: flat_store_dword v[0:1], v2 @@ -240,7 +238,6 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; GFX9-NEXT: .LBB2_2: ; GFX9-NEXT: s_or_b64 exec, exec, s[6:7] ; GFX9-NEXT: s_mov_b64 s[6:7], exec -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s8, v1 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v1, s6, 0 ; GFX9-NEXT: v_mbcnt_hi_u32_b32 v1, s7, v1 @@ -285,16 +282,15 @@ define amdgpu_kernel void @lds_ds_fadd(ptr addrspace(1) %out, ptr addrspace(3) % ; GFX9-NEXT: s_cbranch_execz .LBB2_8 ; GFX9-NEXT: ; %bb.7: ; GFX9-NEXT: v_mov_b32_e32 v2, s2 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: ds_add_rtn_f32 v2, v2, v1 ; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: .LBB2_8: ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5] ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24 -; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: v_readfirstlane_b32 s2, v2 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 ; GFX9-NEXT: v_add_f32_e32 v0, s2, v0 +; GFX9-NEXT: s_waitcnt lgkmcnt(0) ; GFX9-NEXT: global_store_dword v1, v0, s[0:1] ; GFX9-NEXT: s_endpgm ; diff --git a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll index 138dd53..d19ef75 100644 --- a/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll +++ b/llvm/test/CodeGen/AMDGPU/skip-if-dead.ll @@ -1260,8 +1260,6 @@ define amdgpu_ps void @phi_use_def_before_kill(float inreg %x) #0 { ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB11_5: ; %end -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB11_6: ; GFX11-NEXT: s_mov_b64 exec, 0 @@ -1525,8 +1523,6 @@ define amdgpu_ps void @if_after_kill_block(float %arg, float %arg1, float %arg2, ; GFX11-NEXT: global_store_b32 v[0:1], v0, off dlc ; GFX11-NEXT: s_waitcnt_vscnt null, 0x0 ; GFX11-NEXT: .LBB13_5: ; %UnifiedReturnBlock -; GFX11-NEXT: s_nop 0 -; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS) ; GFX11-NEXT: s_endpgm ; GFX11-NEXT: .LBB13_6: ; GFX11-NEXT: s_mov_b64 exec, 0 diff --git a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll index eef5f57..ecebbb9 100644 --- a/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll +++ b/llvm/test/CodeGen/AMDGPU/transform-block-with-return-to-epilog.ll @@ -32,7 +32,7 @@ define amdgpu_ps float @test_return_to_epilog_into_end_block(i32 inreg %a, float ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_WAITCNT_soft 3952 + ; GCN-NEXT: S_WAITCNT 3952 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.3: entry: @@ -79,7 +79,7 @@ define amdgpu_ps float @test_unify_return_to_epilog_into_end_block(i32 inreg %a, ; GCN-NEXT: {{ $}} ; GCN-NEXT: renamable $vgpr0 = V_MOV_B32_e32 0, implicit $exec ; GCN-NEXT: GLOBAL_STORE_DWORD undef renamable $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec :: (volatile store (s32) into `ptr addrspace(1) undef`, addrspace 1) - ; GCN-NEXT: S_WAITCNT_soft 3952 + ; GCN-NEXT: S_WAITCNT 3952 ; GCN-NEXT: {{ $}} ; GCN-NEXT: bb.5: entry: -- cgit v1.1 From 708ce8569067c2aabd3cc669b0db90f23e53b3b0 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Thu, 4 Apr 2024 10:20:14 +0100 Subject: [RemoveDIs][NFC] Use ScopedDbgInfoFormatSetter in more places (#87380) The class `ScopedDbgInfoFormatSetter` was added as a convenient way to temporarily change the debug info format of a function or module, as part of IR printing; since this process is repeated in a number of other places, this patch uses the format-setter class in those places as well. --- llvm/include/llvm/IR/DebugProgramInstruction.h | 19 +++++++++++++++++++ llvm/include/llvm/IR/PassManager.h | 24 +----------------------- llvm/include/llvm/IR/PrintPasses.h | 19 ------------------- llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp | 17 ++++------------- llvm/lib/CodeGen/MIRPrinter.cpp | 20 ++++++-------------- llvm/lib/IR/LegacyPassManager.cpp | 7 +------ llvm/lib/Linker/IRMover.cpp | 21 +++------------------ llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp | 9 ++------- llvm/tools/llvm-dis/llvm-dis.cpp | 6 +----- 9 files changed, 37 insertions(+), 105 deletions(-) diff --git a/llvm/include/llvm/IR/DebugProgramInstruction.h b/llvm/include/llvm/IR/DebugProgramInstruction.h index c947713..9f49874 100644 --- a/llvm/include/llvm/IR/DebugProgramInstruction.h +++ b/llvm/include/llvm/IR/DebugProgramInstruction.h @@ -659,6 +659,25 @@ getDbgRecordRange(DbgMarker *DebugMarker) { DEFINE_ISA_CONVERSION_FUNCTIONS(DbgRecord, LLVMDbgRecordRef) +/// Used to temporarily set the debug info format of a function, module, or +/// basic block for the duration of this object's lifetime, after which the +/// prior state will be restored. +template class ScopedDbgInfoFormatSetter { + T &Obj; + bool OldState; + +public: + ScopedDbgInfoFormatSetter(T &Obj, bool NewState) + : Obj(Obj), OldState(Obj.IsNewDbgInfoFormat) { + Obj.setIsNewDbgInfoFormat(NewState); + } + ~ScopedDbgInfoFormatSetter() { Obj.setIsNewDbgInfoFormat(OldState); } +}; + +template +ScopedDbgInfoFormatSetter(T &Obj, + bool NewState) -> ScopedDbgInfoFormatSetter; + } // namespace llvm #endif // LLVM_IR_DEBUGPROGRAMINSTRUCTION_H diff --git a/llvm/include/llvm/IR/PassManager.h b/llvm/include/llvm/IR/PassManager.h index 1084654..d701481 100644 --- a/llvm/include/llvm/IR/PassManager.h +++ b/llvm/include/llvm/IR/PassManager.h @@ -64,23 +64,6 @@ extern llvm::cl::opt UseNewDbgInfoFormat; namespace llvm { -// RemoveDIs: Provide facilities for converting debug-info from one form to -// another, which are no-ops for everything but modules. -template inline bool shouldConvertDbgInfo(IRUnitT &IR) { - return false; -} -template <> inline bool shouldConvertDbgInfo(Module &IR) { - return !IR.IsNewDbgInfoFormat && UseNewDbgInfoFormat; -} -template inline void doConvertDbgInfoToNew(IRUnitT &IR) {} -template <> inline void doConvertDbgInfoToNew(Module &IR) { - IR.convertToNewDbgValues(); -} -template inline void doConvertDebugInfoToOld(IRUnitT &IR) {} -template <> inline void doConvertDebugInfoToOld(Module &IR) { - IR.convertFromNewDbgValues(); -} - // Forward declare the analysis manager template. template class AnalysisManager; @@ -229,9 +212,7 @@ public: // RemoveDIs: if requested, convert debug-info to DbgRecord representation // for duration of these passes. - bool ShouldConvertDbgInfo = shouldConvertDbgInfo(IR); - if (ShouldConvertDbgInfo) - doConvertDbgInfoToNew(IR); + ScopedDbgInfoFormatSetter FormatSetter(IR, UseNewDbgInfoFormat); for (auto &Pass : Passes) { // Check the PassInstrumentation's BeforePass callbacks before running the @@ -255,9 +236,6 @@ public: PA.intersect(std::move(PassPA)); } - if (ShouldConvertDbgInfo) - doConvertDebugInfoToOld(IR); - // Invalidation was handled after each pass in the above loop for the // current unit of IR. Therefore, the remaining analysis results in the // AnalysisManager are preserved. We mark this with a set so that we don't diff --git a/llvm/include/llvm/IR/PrintPasses.h b/llvm/include/llvm/IR/PrintPasses.h index 3803bd0..95b97e7 100644 --- a/llvm/include/llvm/IR/PrintPasses.h +++ b/llvm/include/llvm/IR/PrintPasses.h @@ -78,25 +78,6 @@ std::string doSystemDiff(StringRef Before, StringRef After, StringRef OldLineFormat, StringRef NewLineFormat, StringRef UnchangedLineFormat); -/// Used to temporarily set the debug info format of a function, module, or -/// basic block for the duration of this object's lifetime, after which the -/// prior state will be restored. -template class ScopedDbgInfoFormatSetter { - T &Obj; - bool OldState; - -public: - ScopedDbgInfoFormatSetter(T &Obj, bool NewState) - : Obj(Obj), OldState(Obj.IsNewDbgInfoFormat) { - Obj.setIsNewDbgInfoFormat(NewState); - } - ~ScopedDbgInfoFormatSetter() { Obj.setIsNewDbgInfoFormat(OldState); } -}; - -template -ScopedDbgInfoFormatSetter(T &Obj, bool NewState) - -> ScopedDbgInfoFormatSetter; - } // namespace llvm #endif // LLVM_IR_PRINTPASSES_H diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp index de2396f..4f2486c 100644 --- a/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/llvm/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -21,19 +21,14 @@ using namespace llvm; extern bool WriteNewDbgInfoFormatToBitcode; PreservedAnalyses BitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { - bool ConvertToOldDbgFormatForWrite = - M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode; - if (ConvertToOldDbgFormatForWrite) - M.convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(M, M.IsNewDbgInfoFormat && + WriteNewDbgInfoFormatToBitcode); const ModuleSummaryIndex *Index = EmitSummaryIndex ? &(AM.getResult(M)) : nullptr; WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, Index, EmitModuleHash); - if (ConvertToOldDbgFormatForWrite) - M.convertToNewDbgValues(); - return PreservedAnalyses::all(); } @@ -57,16 +52,12 @@ namespace { StringRef getPassName() const override { return "Bitcode Writer"; } bool runOnModule(Module &M) override { - bool ConvertToOldDbgFormatForWrite = - M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode; - if (ConvertToOldDbgFormatForWrite) - M.convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter( + M, M.IsNewDbgInfoFormat && WriteNewDbgInfoFormatToBitcode); WriteBitcodeToFile(M, OS, ShouldPreserveUseListOrder, /*Index=*/nullptr, /*EmitModuleHash=*/false); - if (ConvertToOldDbgFormatForWrite) - M.convertToNewDbgValues(); return false; } void getAnalysisUsage(AnalysisUsage &AU) const override { diff --git a/llvm/lib/CodeGen/MIRPrinter.cpp b/llvm/lib/CodeGen/MIRPrinter.cpp index bbc6d39..bf3aee6 100644 --- a/llvm/lib/CodeGen/MIRPrinter.cpp +++ b/llvm/lib/CodeGen/MIRPrinter.cpp @@ -69,6 +69,8 @@ static cl::opt SimplifyMIR( static cl::opt PrintLocations("mir-debug-loc", cl::Hidden, cl::init(true), cl::desc("Print MIR debug-locations")); +extern cl::opt WriteNewDbgInfoFormat; + namespace { /// This structure describes how to print out stack object references. @@ -986,29 +988,19 @@ void MIRFormatter::printIRValue(raw_ostream &OS, const Value &V, } void llvm::printMIR(raw_ostream &OS, const Module &M) { - // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info - // in dbg.value format. - bool IsNewDbgInfoFormat = M.IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast(M).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(const_cast(M), + WriteNewDbgInfoFormat); yaml::Output Out(OS); Out << const_cast(M); - - if (IsNewDbgInfoFormat) - const_cast(M).convertToNewDbgValues(); } void llvm::printMIR(raw_ostream &OS, const MachineFunction &MF) { // RemoveDIs: as there's no textual form for DbgRecords yet, print debug-info // in dbg.value format. - bool IsNewDbgInfoFormat = MF.getFunction().IsNewDbgInfoFormat; - if (IsNewDbgInfoFormat) - const_cast(MF.getFunction()).convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter( + const_cast(MF.getFunction()), WriteNewDbgInfoFormat); MIRPrinter Printer(OS); Printer.print(MF); - - if (IsNewDbgInfoFormat) - const_cast(MF.getFunction()).convertToNewDbgValues(); } diff --git a/llvm/lib/IR/LegacyPassManager.cpp b/llvm/lib/IR/LegacyPassManager.cpp index 953f21c..d361bd9 100644 --- a/llvm/lib/IR/LegacyPassManager.cpp +++ b/llvm/lib/IR/LegacyPassManager.cpp @@ -531,9 +531,7 @@ bool PassManagerImpl::run(Module &M) { // RemoveDIs: if a command line flag is given, convert to the // DbgVariableRecord representation of debug-info for the duration of these // passes. - bool shouldConvertDbgInfo = UseNewDbgInfoFormat && !M.IsNewDbgInfoFormat; - if (shouldConvertDbgInfo) - M.convertToNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(M, UseNewDbgInfoFormat); for (ImmutablePass *ImPass : getImmutablePasses()) Changed |= ImPass->doInitialization(M); @@ -547,9 +545,6 @@ bool PassManagerImpl::run(Module &M) { for (ImmutablePass *ImPass : getImmutablePasses()) Changed |= ImPass->doFinalization(M); - if (shouldConvertDbgInfo) - M.convertFromNewDbgValues(); - return Changed; } } // namespace legacy diff --git a/llvm/lib/Linker/IRMover.cpp b/llvm/lib/Linker/IRMover.cpp index a7e6db8..7a5aa0c 100644 --- a/llvm/lib/Linker/IRMover.cpp +++ b/llvm/lib/Linker/IRMover.cpp @@ -1548,25 +1548,10 @@ Error IRLinker::run() { return Err; // Convert source module to match dest for the duration of the link. - bool SrcModuleNewDbgFormat = SrcM->IsNewDbgInfoFormat; - if (DstM.IsNewDbgInfoFormat != SrcM->IsNewDbgInfoFormat) { - if (DstM.IsNewDbgInfoFormat) - SrcM->convertToNewDbgValues(); - else - SrcM->convertFromNewDbgValues(); - } - // Undo debug mode conversion afterwards. - auto Cleanup = make_scope_exit([&]() { - if (SrcModuleNewDbgFormat != SrcM->IsNewDbgInfoFormat) { - if (SrcModuleNewDbgFormat) - SrcM->convertToNewDbgValues(); - else - SrcM->convertFromNewDbgValues(); - } - }); + ScopedDbgInfoFormatSetter FormatSetter(*SrcM, DstM.IsNewDbgInfoFormat); - // Inherit the target data from the source module if the destination module - // doesn't have one already. + // Inherit the target data from the source module if the destination + // module doesn't have one already. if (DstM.getDataLayout().isDefault()) DstM.setDataLayout(SrcM->getDataLayout()); diff --git a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp index 3986359..4df18c8 100644 --- a/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp +++ b/llvm/lib/Transforms/IPO/ThinLTOBitcodeWriter.cpp @@ -583,10 +583,8 @@ llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { // RemoveDIs: there's no bitcode representation of the DbgVariableRecord // debug-info, convert to dbg.values before writing out. - bool ConvertToOldDbgFormatForWrite = - M.IsNewDbgInfoFormat && !WriteNewDbgInfoFormatToBitcode; - if (ConvertToOldDbgFormatForWrite) - M.convertFromNewDbgValues(); + ScopedDbgInfoFormatSetter FormatSetter(M, M.IsNewDbgInfoFormat && + WriteNewDbgInfoFormatToBitcode); bool Changed = writeThinLTOBitcode( OS, ThinLinkOS, @@ -595,8 +593,5 @@ llvm::ThinLTOBitcodeWriterPass::run(Module &M, ModuleAnalysisManager &AM) { }, M, &AM.getResult(M)); - if (ConvertToOldDbgFormatForWrite) - M.convertToNewDbgValues(); - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/tools/llvm-dis/llvm-dis.cpp b/llvm/tools/llvm-dis/llvm-dis.cpp index 49154dc..6ad1c995 100644 --- a/llvm/tools/llvm-dis/llvm-dis.cpp +++ b/llvm/tools/llvm-dis/llvm-dis.cpp @@ -258,12 +258,8 @@ int main(int argc, char **argv) { // All that llvm-dis does is write the assembly to a file. if (!DontPrint) { if (M) { - bool ChangeDbgFormat = M->IsNewDbgInfoFormat != WriteNewDbgInfoFormat; - if (ChangeDbgFormat) - M->setIsNewDbgInfoFormat(WriteNewDbgInfoFormat); + ScopedDbgInfoFormatSetter FormatSetter(*M, WriteNewDbgInfoFormat); M->print(Out->os(), Annotator.get(), PreserveAssemblyUseListOrder); - if (ChangeDbgFormat) - M->setIsNewDbgInfoFormat(!WriteNewDbgInfoFormat); } if (Index) Index->print(Out->os()); -- cgit v1.1 From 7a8cf951b3bdc60feac412200ab9661e009d44ae Mon Sep 17 00:00:00 2001 From: Tim Northover Date: Thu, 4 Apr 2024 09:52:48 +0100 Subject: AArch64-Darwin: allow -mcmodel=large with (default) PIC Darwin targets implement -mcmodel=large by forcing all global accesses to use the GOT, instead of the ELF movz/movk sequence. That means it's compatible with PIC so the Clang driver shouldn't reject the option. --- clang/lib/Driver/ToolChains/Clang.cpp | 3 ++- clang/test/Driver/mcmodel.c | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index b7ec7e0..766a9b91 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -5882,7 +5882,8 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, CM = "large"; if (Triple.isAArch64(64)) { Ok = CM == "tiny" || CM == "small" || CM == "large"; - if (CM == "large" && RelocationModel != llvm::Reloc::Static) + if (CM == "large" && !Triple.isOSBinFormatMachO() && + RelocationModel != llvm::Reloc::Static) D.Diag(diag::err_drv_argument_only_allowed_with) << A->getAsString(Args) << "-fno-pic"; } else if (Triple.isLoongArch()) { diff --git a/clang/test/Driver/mcmodel.c b/clang/test/Driver/mcmodel.c index 1eb6ae1..9681c32 100644 --- a/clang/test/Driver/mcmodel.c +++ b/clang/test/Driver/mcmodel.c @@ -11,6 +11,7 @@ // RUN: FileCheck --check-prefix=AIX-MCMEDIUM-OVERRIDE %s < %t.log // RUN: not %clang -### -c -mcmodel=lager %s 2>&1 | FileCheck --check-prefix=INVALID %s // RUN: %clang --target=aarch64 -### -S -mcmodel=large -fno-pic %s 2>&1 | FileCheck --check-prefix=LARGE %s +// RUN: %clang --target=aarch64-apple-macosx -### -S -mcmodel=large %s 2>&1 | FileCheck --check-prefix=LARGE %s // RUN: not %clang --target=aarch64 -### -S -mcmodel=large -fpic %s 2>&1 | FileCheck --check-prefix=AARCH64-PIC-LARGE %s // RUN: not %clang -### -c --target=aarch64 -mcmodel=medium %s 2>&1 | FileCheck --check-prefix=ERR-MEDIUM %s // RUN: not %clang -### -c --target=aarch64 -mcmodel=kernel %s 2>&1 | FileCheck --check-prefix=ERR-KERNEL %s -- cgit v1.1 From cca9115b1c640a307b510821cb84a1f3bb76b969 Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Thu, 4 Apr 2024 12:38:09 +0300 Subject: [lld][AArch64][ELF][PAC] Support AUTH relocations and AUTH ELF marking (#72714) This patch adds lld support for: - Dynamic R_AARCH64_AUTH_* relocations (without including RELR compressed AUTH relocations) as described here: https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#auth-variant-dynamic-relocations - .note.AARCH64-PAUTH-ABI-tag section as defined here https://github.com/ARM-software/abi-aa/blob/main/pauthabielf64/pauthabielf64.rst#elf-marking Depends on #72713 and #85231 --------- Co-authored-by: Peter Collingbourne Co-authored-by: Fangrui Song --- lld/ELF/Arch/AArch64.cpp | 4 +- lld/ELF/Config.h | 3 + lld/ELF/Driver.cpp | 70 +++++++++++++++---- lld/ELF/InputFiles.cpp | 41 +++++------ lld/ELF/InputFiles.h | 1 + lld/ELF/InputSection.cpp | 1 + lld/ELF/Relocations.cpp | 28 ++++++-- lld/ELF/Relocations.h | 1 + lld/ELF/SyntheticSections.cpp | 40 ++++++++--- lld/ELF/Writer.cpp | 3 +- lld/docs/ReleaseNotes.rst | 3 + lld/docs/ld.lld.1 | 5 ++ lld/test/ELF/aarch64-bti-pac-cli-error.s | 15 ++-- lld/test/ELF/aarch64-feature-pauth.s | 114 +++++++++++++++++++++++++++++++ lld/test/ELF/aarch64-reloc-pauth-ro.s | 22 ++++++ lld/test/ELF/aarch64-reloc-pauth.s | 108 +++++++++++++++++++++++++++++ 16 files changed, 403 insertions(+), 56 deletions(-) create mode 100644 lld/test/ELF/aarch64-feature-pauth.s create mode 100644 lld/test/ELF/aarch64-reloc-pauth-ro.s create mode 100644 lld/test/ELF/aarch64-reloc-pauth.s diff --git a/lld/ELF/Arch/AArch64.cpp b/lld/ELF/Arch/AArch64.cpp index 30ccd68..017c17c 100644 --- a/lld/ELF/Arch/AArch64.cpp +++ b/lld/ELF/Arch/AArch64.cpp @@ -113,6 +113,8 @@ RelExpr AArch64::getRelExpr(RelType type, const Symbol &s, case R_AARCH64_MOVW_UABS_G2_NC: case R_AARCH64_MOVW_UABS_G3: return R_ABS; + case R_AARCH64_AUTH_ABS64: + return R_AARCH64_AUTH; case R_AARCH64_TLSDESC_ADR_PAGE21: return R_AARCH64_TLSDESC_PAGE; case R_AARCH64_TLSDESC_LD64_LO12: @@ -204,7 +206,7 @@ bool AArch64::usesOnlyLowPageBits(RelType type) const { } RelType AArch64::getDynRel(RelType type) const { - if (type == R_AARCH64_ABS64) + if (type == R_AARCH64_ABS64 || type == R_AARCH64_AUTH_ABS64) return type; return R_AARCH64_NONE; } diff --git a/lld/ELF/Config.h b/lld/ELF/Config.h index 27274d6..83f293a 100644 --- a/lld/ELF/Config.h +++ b/lld/ELF/Config.h @@ -187,6 +187,7 @@ struct Config { llvm::StringRef cmseOutputLib; StringRef zBtiReport = "none"; StringRef zCetReport = "none"; + StringRef zPauthReport = "none"; bool ltoBBAddrMap; llvm::StringRef ltoBasicBlockSections; std::pair thinLTOObjectSuffixReplace; @@ -499,6 +500,8 @@ struct Ctx { void reset(); llvm::raw_fd_ostream openAuxiliaryFile(llvm::StringRef, std::error_code &); + + ArrayRef aarch64PauthAbiCoreInfo; }; LLVM_LIBRARY_VISIBILITY extern Ctx ctx; diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index b43da77..8dbff7f 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -46,6 +46,7 @@ #include "lld/Common/Strings.h" #include "lld/Common/TargetOptionsCommandFlags.h" #include "lld/Common/Version.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringSwitch.h" @@ -461,6 +462,8 @@ static void checkOptions() { error("-z force-bti only supported on AArch64"); if (config->zBtiReport != "none") error("-z bti-report only supported on AArch64"); + if (config->zPauthReport != "none") + error("-z pauth-report only supported on AArch64"); } if (config->emachine != EM_386 && config->emachine != EM_X86_64 && @@ -1501,7 +1504,8 @@ static void readConfigs(opt::InputArgList &args) { } auto reports = {std::make_pair("bti-report", &config->zBtiReport), - std::make_pair("cet-report", &config->zCetReport)}; + std::make_pair("cet-report", &config->zCetReport), + std::make_pair("pauth-report", &config->zPauthReport)}; for (opt::Arg *arg : args.filtered(OPT_z)) { std::pair option = StringRef(arg->getValue()).split('='); @@ -2599,14 +2603,17 @@ static void redirectSymbols(ArrayRef wrapped) { symtab.wrap(w.sym, w.real, w.wrap); } +static void reportMissingFeature(StringRef config, const Twine &report) { + if (config == "error") + error(report); + else if (config == "warning") + warn(report); +} + static void checkAndReportMissingFeature(StringRef config, uint32_t features, uint32_t mask, const Twine &report) { - if (!(features & mask)) { - if (config == "error") - error(report); - else if (config == "warning") - warn(report); - } + if (!(features & mask)) + reportMissingFeature(config, report); } // To enable CET (x86's hardware-assisted control flow enforcement), each @@ -2617,12 +2624,28 @@ static void checkAndReportMissingFeature(StringRef config, uint32_t features, // // This is also the case with AARCH64's BTI and PAC which use the similar // GNU_PROPERTY_AARCH64_FEATURE_1_AND mechanism. -static uint32_t getAndFeatures() { +// +// For AArch64 PAuth-enabled object files, the core info of all of them must +// match. Missing info for some object files with matching info for remaining +// ones can be allowed (see -z pauth-report). +static void readSecurityNotes() { if (config->emachine != EM_386 && config->emachine != EM_X86_64 && config->emachine != EM_AARCH64) - return 0; + return; + + config->andFeatures = -1; + + StringRef referenceFileName; + if (config->emachine == EM_AARCH64) { + auto it = llvm::find_if(ctx.objectFiles, [](const ELFFileBase *f) { + return !f->aarch64PauthAbiCoreInfo.empty(); + }); + if (it != ctx.objectFiles.end()) { + ctx.aarch64PauthAbiCoreInfo = (*it)->aarch64PauthAbiCoreInfo; + referenceFileName = (*it)->getName(); + } + } - uint32_t ret = -1; for (ELFFileBase *f : ctx.objectFiles) { uint32_t features = f->andFeatures; @@ -2658,14 +2681,31 @@ static uint32_t getAndFeatures() { "GNU_PROPERTY_AARCH64_FEATURE_1_PAC property"); features |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC; } - ret &= features; + config->andFeatures &= features; + + if (ctx.aarch64PauthAbiCoreInfo.empty()) + continue; + + if (f->aarch64PauthAbiCoreInfo.empty()) { + reportMissingFeature(config->zPauthReport, + toString(f) + + ": -z pauth-report: file does not have AArch64 " + "PAuth core info while '" + + referenceFileName + "' has one"); + continue; + } + + if (ctx.aarch64PauthAbiCoreInfo != f->aarch64PauthAbiCoreInfo) + errorOrWarn("incompatible values of AArch64 PAuth core info found\n>>> " + + referenceFileName + ": 0x" + + toHex(ctx.aarch64PauthAbiCoreInfo, /*LowerCase=*/true) + + "\n>>> " + toString(f) + ": 0x" + + toHex(f->aarch64PauthAbiCoreInfo, /*LowerCase=*/true)); } // Force enable Shadow Stack. if (config->zShstk) - ret |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; - - return ret; + config->andFeatures |= GNU_PROPERTY_X86_FEATURE_1_SHSTK; } static void initSectionsAndLocalSyms(ELFFileBase *file, bool ignoreComdats) { @@ -2944,7 +2984,7 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Read .note.gnu.property sections from input object files which // contain a hint to tweak linker's and loader's behaviors. - config->andFeatures = getAndFeatures(); + readSecurityNotes(); // The Target instance handles target-specific stuff, such as applying // relocations or writing a PLT section. It also contains target-dependent diff --git a/lld/ELF/InputFiles.cpp b/lld/ELF/InputFiles.cpp index 6529ea0..1f49602 100644 --- a/lld/ELF/InputFiles.cpp +++ b/lld/ELF/InputFiles.cpp @@ -926,25 +926,18 @@ void ObjFile::initializeSections(bool ignoreComdats, handleSectionGroup(this->sections, entries); } -// If a source file is compiled with x86 hardware-assisted call flow control -// enabled, the generated object file contains feature flags indicating that -// fact. This function reads the feature flags and returns it. -// -// Essentially we want to read a single 32-bit value in this function, but this -// function is rather complicated because the value is buried deep inside a -// .note.gnu.property section. -// -// The section consists of one or more NOTE records. Each NOTE record consists -// of zero or more type-length-value fields. We want to find a field of a -// certain type. It seems a bit too much to just store a 32-bit value, perhaps -// the ABI is unnecessarily complicated. -template static uint32_t readAndFeatures(const InputSection &sec) { +// Read the following info from the .note.gnu.property section and write it to +// the corresponding fields in `ObjFile`: +// - Feature flags (32 bits) representing x86 or AArch64 features for +// hardware-assisted call flow control; +// - AArch64 PAuth ABI core info (16 bytes). +template +void readGnuProperty(const InputSection &sec, ObjFile &f) { using Elf_Nhdr = typename ELFT::Nhdr; using Elf_Note = typename ELFT::Note; - uint32_t featuresSet = 0; ArrayRef data = sec.content(); - auto reportFatal = [&](const uint8_t *place, const char *msg) { + auto reportFatal = [&](const uint8_t *place, const Twine &msg) { fatal(toString(sec.file) + ":(" + sec.name + "+0x" + Twine::utohexstr(place - sec.content().data()) + "): " + msg); }; @@ -983,7 +976,19 @@ template static uint32_t readAndFeatures(const InputSection &sec) { // accumulate the bits set. if (size < 4) reportFatal(place, "FEATURE_1_AND entry is too short"); - featuresSet |= read32(desc.data()); + f.andFeatures |= read32(desc.data()); + } else if (config->emachine == EM_AARCH64 && + type == GNU_PROPERTY_AARCH64_FEATURE_PAUTH) { + if (!f.aarch64PauthAbiCoreInfo.empty()) { + reportFatal(data.data(), + "multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are " + "not supported"); + } else if (size != 16) { + reportFatal(data.data(), "GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry " + "is invalid: expected 16 bytes, but got " + + Twine(size)); + } + f.aarch64PauthAbiCoreInfo = desc; } // Padding is present in the note descriptor, if necessary. @@ -993,8 +998,6 @@ template static uint32_t readAndFeatures(const InputSection &sec) { // Go to next NOTE record to look for more FEATURE_1_AND descriptions. data = data.slice(nhdr->getSize(sec.addralign)); } - - return featuresSet; } template @@ -1051,7 +1054,7 @@ InputSectionBase *ObjFile::createInputSection(uint32_t idx, // .note.gnu.property containing a single AND'ed bitmap, we discard an input // file's .note.gnu.property section. if (name == ".note.gnu.property") { - this->andFeatures = readAndFeatures(InputSection(*this, sec, name)); + readGnuProperty(InputSection(*this, sec, name), *this); return &InputSection::discarded; } diff --git a/lld/ELF/InputFiles.h b/lld/ELF/InputFiles.h index 9519759..834b3b6 100644 --- a/lld/ELF/InputFiles.h +++ b/lld/ELF/InputFiles.h @@ -230,6 +230,7 @@ protected: public: uint32_t andFeatures = 0; bool hasCommonSyms = false; + ArrayRef aarch64PauthAbiCoreInfo; }; // .o file. diff --git a/lld/ELF/InputSection.cpp b/lld/ELF/InputSection.cpp index 4f88313..c06816b 100644 --- a/lld/ELF/InputSection.cpp +++ b/lld/ELF/InputSection.cpp @@ -676,6 +676,7 @@ uint64_t InputSectionBase::getRelocTargetVA(const InputFile *file, RelType type, case R_DTPREL: case R_RELAX_TLS_LD_TO_LE_ABS: case R_RELAX_GOT_PC_NOPIC: + case R_AARCH64_AUTH: case R_RISCV_ADD: case R_RISCV_LEB128: return sym.getVA(a); diff --git a/lld/ELF/Relocations.cpp b/lld/ELF/Relocations.cpp index 92f2e20..5527434 100644 --- a/lld/ELF/Relocations.cpp +++ b/lld/ELF/Relocations.cpp @@ -995,7 +995,8 @@ bool RelocationScanner::isStaticLinkTimeConstant(RelExpr e, RelType type, if (e == R_GOT || e == R_PLT) return target->usesOnlyLowPageBits(type) || !config->isPic; - if (sym.isPreemptible) + // R_AARCH64_AUTH_ABS64 requires a dynamic relocation. + if (sym.isPreemptible || e == R_AARCH64_AUTH) return false; if (!config->isPic) return true; @@ -1141,12 +1142,26 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, (rel == target->symbolicRel && !sym.isPreemptible)) { addRelativeReloc(*sec, offset, sym, addend, expr, type); return; - } else if (rel != 0) { + } + if (rel != 0) { if (config->emachine == EM_MIPS && rel == target->symbolicRel) rel = target->relativeRel; std::lock_guard lock(relocMutex); - sec->getPartition().relaDyn->addSymbolReloc(rel, *sec, offset, sym, - addend, type); + Partition &part = sec->getPartition(); + if (config->emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64) { + // For a preemptible symbol, we can't use a relative relocation. For an + // undefined symbol, we can't compute offset at link-time and use a + // relative relocation. Use a symbolic relocation instead. + if (sym.isPreemptible) { + part.relaDyn->addSymbolReloc(type, *sec, offset, sym, addend, type); + } else { + part.relaDyn->addReloc({R_AARCH64_AUTH_RELATIVE, sec, offset, + DynamicReloc::AddendOnlyWithTargetVA, sym, + addend, R_ABS}); + } + return; + } + part.relaDyn->addSymbolReloc(rel, *sec, offset, sym, addend, type); // MIPS ABI turns using of GOT and dynamic relocations inside out. // While regular ABI uses dynamic relocations to fill up GOT entries @@ -1171,7 +1186,10 @@ void RelocationScanner::processAux(RelExpr expr, RelType type, uint64_t offset, // When producing an executable, we can perform copy relocations (for // STT_OBJECT) and canonical PLT (for STT_FUNC) if sym is defined by a DSO. - if (!config->shared && sym.isShared()) { + // Copy relocations/canonical PLT entries are unsupported for + // R_AARCH64_AUTH_ABS64. + if (!config->shared && sym.isShared() && + !(config->emachine == EM_AARCH64 && type == R_AARCH64_AUTH_ABS64)) { if (!canDefineSymbolInExecutable(sym)) { errorOrWarn("cannot preempt symbol: " + toString(sym) + getLocation(*sec, sym, offset)); diff --git a/lld/ELF/Relocations.h b/lld/ELF/Relocations.h index 7eb8a811..b7b9c09 100644 --- a/lld/ELF/Relocations.h +++ b/lld/ELF/Relocations.h @@ -87,6 +87,7 @@ enum RelExpr { R_AARCH64_PAGE_PC, R_AARCH64_RELAX_TLS_GD_TO_IE_PAGE_PC, R_AARCH64_TLSDESC_PAGE, + R_AARCH64_AUTH, R_ARM_PCA, R_ARM_SBREL, R_MIPS_GOTREL, diff --git a/lld/ELF/SyntheticSections.cpp b/lld/ELF/SyntheticSections.cpp index d4dc713..4427a12 100644 --- a/lld/ELF/SyntheticSections.cpp +++ b/lld/ELF/SyntheticSections.cpp @@ -314,22 +314,42 @@ GnuPropertySection::GnuPropertySection() config->wordsize, ".note.gnu.property") {} void GnuPropertySection::writeTo(uint8_t *buf) { + write32(buf, 4); // Name size + write32(buf + 4, getSize() - 16); // Content size + write32(buf + 8, NT_GNU_PROPERTY_TYPE_0); // Type + memcpy(buf + 12, "GNU", 4); // Name string + uint32_t featureAndType = config->emachine == EM_AARCH64 ? GNU_PROPERTY_AARCH64_FEATURE_1_AND : GNU_PROPERTY_X86_FEATURE_1_AND; - write32(buf, 4); // Name size - write32(buf + 4, config->is64 ? 16 : 12); // Content size - write32(buf + 8, NT_GNU_PROPERTY_TYPE_0); // Type - memcpy(buf + 12, "GNU", 4); // Name string - write32(buf + 16, featureAndType); // Feature type - write32(buf + 20, 4); // Feature size - write32(buf + 24, config->andFeatures); // Feature flags - if (config->is64) - write32(buf + 28, 0); // Padding + unsigned offset = 16; + if (config->andFeatures != 0) { + write32(buf + offset + 0, featureAndType); // Feature type + write32(buf + offset + 4, 4); // Feature size + write32(buf + offset + 8, config->andFeatures); // Feature flags + if (config->is64) + write32(buf + offset + 12, 0); // Padding + offset += 16; + } + + if (!ctx.aarch64PauthAbiCoreInfo.empty()) { + write32(buf + offset + 0, GNU_PROPERTY_AARCH64_FEATURE_PAUTH); + write32(buf + offset + 4, ctx.aarch64PauthAbiCoreInfo.size()); + memcpy(buf + offset + 8, ctx.aarch64PauthAbiCoreInfo.data(), + ctx.aarch64PauthAbiCoreInfo.size()); + } } -size_t GnuPropertySection::getSize() const { return config->is64 ? 32 : 28; } +size_t GnuPropertySection::getSize() const { + uint32_t contentSize = 0; + if (config->andFeatures != 0) + contentSize += config->is64 ? 16 : 12; + if (!ctx.aarch64PauthAbiCoreInfo.empty()) + contentSize += 4 + 4 + ctx.aarch64PauthAbiCoreInfo.size(); + assert(contentSize != 0); + return contentSize + 16; +} BuildIdSection::BuildIdSection() : SyntheticSection(SHF_ALLOC, SHT_NOTE, 4, ".note.gnu.build-id"), diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 40d617b..fc9084f 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -24,6 +24,7 @@ #include "lld/Common/CommonLinkerContext.h" #include "lld/Common/Filesystem.h" #include "lld/Common/Strings.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringMap.h" #include "llvm/Support/BLAKE3.h" #include "llvm/Support/Parallel.h" @@ -564,7 +565,7 @@ template void elf::createSyntheticSections() { in.iplt = std::make_unique(); add(*in.iplt); - if (config->andFeatures) + if (config->andFeatures || !ctx.aarch64PauthAbiCoreInfo.empty()) add(*make()); // .note.GNU-stack is always added when we are creating a re-linkable diff --git a/lld/docs/ReleaseNotes.rst b/lld/docs/ReleaseNotes.rst index 97ed060..bf0c8e5 100644 --- a/lld/docs/ReleaseNotes.rst +++ b/lld/docs/ReleaseNotes.rst @@ -29,6 +29,9 @@ ELF Improvements * ``--compress-sections =[none|zlib|zstd]`` is added to compress matched output sections without the ``SHF_ALLOC`` flag. (`#84855 `_) +* ``GNU_PROPERTY_AARCH64_FEATURE_PAUTH`` notes, ``R_AARCH64_AUTH_ABS64`` and + ``R_AARCH64_AUTH_RELATIVE`` relocations are now supported. + (`#72714 `_) Breaking changes ---------------- diff --git a/lld/docs/ld.lld.1 b/lld/docs/ld.lld.1 index 65e50e3..e031673 100644 --- a/lld/docs/ld.lld.1 +++ b/lld/docs/ld.lld.1 @@ -762,6 +762,11 @@ Specify how to report the missing GNU_PROPERTY_X86_FEATURE_1_IBT or GNU_PROPERTY .Cm none is the default, linker will not report the missing property otherwise will be reported as a warning or an error. .Pp +.It Cm pauth-report Ns = Ns Ar [none|warning|error] +Specify how to report the missing GNU_PROPERTY_AARCH64_FEATURE_PAUTH property. +.Cm none +is the default, linker will not report the missing property otherwise will be reported as a warning or an error. +.Pp .It Cm force-bti Force enable AArch64 BTI instruction in PLT, warn if Input ELF file does not have GNU_PROPERTY_AARCH64_FEATURE_1_BTI property. .Pp diff --git a/lld/test/ELF/aarch64-bti-pac-cli-error.s b/lld/test/ELF/aarch64-bti-pac-cli-error.s index b8ab1a2..703c0aa 100644 --- a/lld/test/ELF/aarch64-bti-pac-cli-error.s +++ b/lld/test/ELF/aarch64-bti-pac-cli-error.s @@ -1,17 +1,22 @@ # REQUIRES: x86 # RUN: llvm-mc --triple=x86_64-pc-linux --filetype=obj -o %t.o %s -# RUN: not ld.lld -z pac-plt -z force-bti -z bti-report=error %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -z pac-plt -z force-bti -z bti-report=error \ +# RUN: -z pauth-report=error %t.o -o /dev/null 2>&1 | FileCheck %s +# RUN: not ld.lld -z pac-plt -z force-bti -z bti-report=warning \ +# RUN: -z pauth-report=warning %t.o -o /dev/null 2>&1 | FileCheck %s # -## Check that we error if -z pac-plt, -z force-bti and -z bti-report=error are used when target is not -## aarch64 +## Check that we error if -z pac-plt, -z force-bti are present and +## -z bti-report and -z pauth-report are not none when target is not aarch64 # CHECK: error: -z pac-plt only supported on AArch64 # CHECK-NEXT: error: -z force-bti only supported on AArch64 # CHECK-NEXT: error: -z bti-report only supported on AArch64 +# CHECK-NEXT: error: -z pauth-report only supported on AArch64 -# RUN: not ld.lld -z bti-report=something %t.o -o /dev/null 2>&1 | \ -# RUN: FileCheck --check-prefix=REPORT_INVALID %s +# RUN: not ld.lld -z bti-report=something -z pauth-report=something \ +# RUN: %t.o -o /dev/null 2>&1 | FileCheck --check-prefix=REPORT_INVALID %s # REPORT_INVALID: error: -z bti-report= parameter something is not recognized +# REPORT_INVALID: error: -z pauth-report= parameter something is not recognized # REPORT_INVALID-EMPTY: .globl start diff --git a/lld/test/ELF/aarch64-feature-pauth.s b/lld/test/ELF/aarch64-feature-pauth.s new file mode 100644 index 0000000..699a650 --- /dev/null +++ b/lld/test/ELF/aarch64-feature-pauth.s @@ -0,0 +1,114 @@ +# REQUIRES: aarch64 + +# RUN: rm -rf %t && split-file %s %t && cd %t + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag1.s -o tag1.o +# RUN: cp tag1.o tag1a.o +# RUN: ld.lld -shared tag1.o tag1a.o -o tagok.so +# RUN: llvm-readelf -n tagok.so | FileCheck --check-prefix OK %s + +# OK: AArch64 PAuth ABI core info: platform 0x2a (unknown), version 0x1 + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag2.s -o tag2.o +# RUN: not ld.lld tag1.o tag1a.o tag2.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR1 %s + +# ERR1: error: incompatible values of AArch64 PAuth core info found +# ERR1-NEXT: >>> tag1.o: 0x2a000000000000000{{1|2}}00000000000000 +# ERR1-NEXT: >>> tag2.o: 0x2a000000000000000{{1|2}}00000000000000 + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-short.s -o short.o +# RUN: not ld.lld short.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR2 %s + +# ERR2: error: short.o:(.note.gnu.property+0x0): GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry is invalid: expected 16 bytes, but got 12 + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-long.s -o long.o +# RUN: not ld.lld long.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR3 %s + +# ERR3: error: long.o:(.note.gnu.property+0x0): GNU_PROPERTY_AARCH64_FEATURE_PAUTH entry is invalid: expected 16 bytes, but got 24 + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu abi-tag-multiple.s -o multiple.o +# RUN: not ld.lld multiple.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR4 %s +# ERR4: error: multiple.o:(.note.gnu.property+0x0): multiple GNU_PROPERTY_AARCH64_FEATURE_PAUTH entries are not supported + +# RUN: llvm-mc -filetype=obj -triple=aarch64-linux-gnu no-info.s -o noinfo1.o +# RUN: cp noinfo1.o noinfo2.o +# RUN: not ld.lld -z pauth-report=error noinfo1.o tag1.o noinfo2.o -o /dev/null 2>&1 | FileCheck --check-prefix ERR5 %s +# RUN: ld.lld -z pauth-report=warning noinfo1.o tag1.o noinfo2.o -o /dev/null 2>&1 | FileCheck --check-prefix WARN %s +# RUN: ld.lld -z pauth-report=none noinfo1.o tag1.o noinfo2.o --fatal-warnings -o /dev/null + +# ERR5: error: noinfo1.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one +# ERR5-NEXT: error: noinfo2.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one +# WARN: warning: noinfo1.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one +# WARN-NEXT: warning: noinfo2.o: -z pauth-report: file does not have AArch64 PAuth core info while 'tag1.o' has one + +#--- abi-tag-short.s + +.section ".note.gnu.property", "a" +.long 4 +.long 20 +.long 5 +.asciz "GNU" +.long 0xc0000001 +.long 12 +.quad 2 +.long 31 + +#--- abi-tag-long.s + +.section ".note.gnu.property", "a" +.long 4 +.long 32 +.long 5 +.asciz "GNU" +.long 0xc0000001 +.long 24 +.quad 2 +.quad 31 +.quad 0 + +#--- abi-tag-multiple.s + +.section ".note.gnu.property", "a" +.long 4 +.long 48 +.long 5 +.asciz "GNU" +.long 0xc0000001 +.long 16 +.quad 42 // platform +.quad 1 // version +.long 0xc0000001 +.long 16 +.quad 42 // platform +.quad 1 // version + +#--- abi-tag1.s + +.section ".note.gnu.property", "a" +.long 4 +.long 24 +.long 5 +.asciz "GNU" +.long 0xc0000001 +.long 16 +.quad 42 // platform +.quad 1 // version + +#--- abi-tag2.s + +.section ".note.gnu.property", "a" +.long 4 +.long 24 +.long 5 +.asciz "GNU" +.long 0xc0000001 +.long 16 +.quad 42 // platform +.quad 2 // version + +#--- no-info.s + +## define _start to avoid missing entry warning and use --fatal-warnings to assert no diagnostic +## allow multiple definitions of _start for simplicity +.weak _start; +_start: diff --git a/lld/test/ELF/aarch64-reloc-pauth-ro.s b/lld/test/ELF/aarch64-reloc-pauth-ro.s new file mode 100644 index 0000000..1be78ba --- /dev/null +++ b/lld/test/ELF/aarch64-reloc-pauth-ro.s @@ -0,0 +1,22 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %p/Inputs/shared2.s -o %t.so.o +# RUN: ld.lld -shared %t.so.o -soname=so -o %t.so +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o +# RUN: not ld.lld -pie %t.o %t.so -o %t2 2>&1 | FileCheck -DFILE=%t %s --implicit-check-not=error: + +# CHECK: error: relocation R_AARCH64_AUTH_ABS64 cannot be used against symbol 'zed2'; recompile with -fPIC +# CHECK-NEXT: >>> defined in [[FILE]].so +# CHECK-NEXT: >>> referenced by [[FILE]].o:(.ro+0x0) + +# CHECK: error: relocation R_AARCH64_AUTH_ABS64 cannot be used against symbol 'bar2'; recompile with -fPIC +# CHECK: error: relocation R_AARCH64_AUTH_ABS64 cannot be used against local symbol; recompile with -fPIC + +foo: +.type foo, @function + +.section .ro, "a" +.p2align 3 +.quad zed2@AUTH(da,42) +.quad bar2@AUTH(ia,42) +.quad foo@AUTH(ia,42) diff --git a/lld/test/ELF/aarch64-reloc-pauth.s b/lld/test/ELF/aarch64-reloc-pauth.s new file mode 100644 index 0000000..b603d8f --- /dev/null +++ b/lld/test/ELF/aarch64-reloc-pauth.s @@ -0,0 +1,108 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -filetype=obj -triple=aarch64 %p/Inputs/shared2.s -o %t.a.o +# RUN: ld.lld -shared %t.a.o -soname=so -o %t.a.so +# RUN: llvm-mc -filetype=obj -triple=aarch64 %s -o %t.o + +# RUN: ld.lld -pie %t.o %t.a.so -o %t +# RUN: llvm-readobj -r %t | FileCheck --check-prefix=UNPACKED %s + +# UNPACKED: Section ({{.+}}) .rela.dyn { +# UNPACKED-NEXT: 0x30470 R_AARCH64_AUTH_RELATIVE - 0x1 +# UNPACKED-NEXT: 0x30478 R_AARCH64_AUTH_RELATIVE - 0x30472 +# UNPACKED-NEXT: 0x30480 R_AARCH64_AUTH_RELATIVE - 0xFFFFFFFFFFFFFFFD +# UNPACKED-NEXT: 0x30488 R_AARCH64_AUTH_RELATIVE - 0x12345678 +# UNPACKED-NEXT: 0x30490 R_AARCH64_AUTH_RELATIVE - 0x123456789A +# UNPACKED-NEXT: 0x30498 R_AARCH64_AUTH_RELATIVE - 0xFFFFFFEDCBA98766 +# UNPACKED-NEXT: 0x304A0 R_AARCH64_AUTH_RELATIVE - 0x8003046F +# UNPACKED-NEXT: 0x304B9 R_AARCH64_AUTH_RELATIVE - 0x4 +# UNPACKED-NEXT: 0x304C2 R_AARCH64_AUTH_RELATIVE - 0x30475 +# UNPACKED-NEXT: 0x304A8 R_AARCH64_AUTH_ABS64 zed2 0x1111 +# UNPACKED-NEXT: 0x304B0 R_AARCH64_AUTH_ABS64 bar2 0x0 +# UNPACKED-NEXT: } + +# RUN: ld.lld %t.o %t.a.so -o %t.nopie +# RUN: llvm-readobj -r %t.nopie | FileCheck --check-prefix=NOPIE %s + +# NOPIE: Section ({{.+}}) .rela.dyn { +# NOPIE: 0x230460 R_AARCH64_AUTH_RELATIVE - 0x200001 +# NOPIE-NEXT: 0x230468 R_AARCH64_AUTH_RELATIVE - 0x230462 +# NOPIE-NEXT: 0x230470 R_AARCH64_AUTH_RELATIVE - 0x1FFFFD +# NOPIE-NEXT: 0x230478 R_AARCH64_AUTH_RELATIVE - 0x12545678 +# NOPIE-NEXT: 0x230480 R_AARCH64_AUTH_RELATIVE - 0x123476789A +# NOPIE-NEXT: 0x230488 R_AARCH64_AUTH_RELATIVE - 0xFFFFFFEDCBC98766 +# NOPIE-NEXT: 0x230490 R_AARCH64_AUTH_RELATIVE - 0x8023045F +# NOPIE-NEXT: 0x2304A9 R_AARCH64_AUTH_RELATIVE - 0x200004 +# NOPIE-NEXT: 0x2304B2 R_AARCH64_AUTH_RELATIVE - 0x230465 +# NOPIE-NEXT: 0x230498 R_AARCH64_AUTH_ABS64 zed2 0x1111 +# NOPIE-NEXT: 0x2304A0 R_AARCH64_AUTH_ABS64 bar2 0x0 +# NOPIE-NEXT: } + +# RUN: ld.lld -pie %t.o %t.a.so -o %t.pie +# RUN: llvm-readelf -S -d -r -x .test %t.pie | FileCheck --check-prefixes=PIE,HEX %s + +# PIE: Section Headers: +# PIE-NEXT: Name Type Address Off Size ES Flg Lk Inf Al +# PIE: .rela.dyn RELA {{0*}}[[#%x,ADDR1:]] +# PIE-SAME: {{0*}}[[#ADDR1]] 000108 18 A 1 0 8 + +# PIE: Relocation section '.rela.dyn' at offset 0x[[#ADDR1]] contains 11 entries: +# PIE-NEXT: Offset Info Type Symbol's Value Symbol's Name + Addend +# PIE-NEXT: 0000000000030470 0000000000000411 R_AARCH64_AUTH_RELATIVE 1 +# PIE-NEXT: 0000000000030478 0000000000000411 R_AARCH64_AUTH_RELATIVE 30472 +# PIE-NEXT: 0000000000030480 0000000000000411 R_AARCH64_AUTH_RELATIVE fffffffffffffffd +# PIE-NEXT: 0000000000030488 0000000000000411 R_AARCH64_AUTH_RELATIVE 12345678 +# PIE-NEXT: 0000000000030490 0000000000000411 R_AARCH64_AUTH_RELATIVE 123456789a +# PIE-NEXT: 0000000000030498 0000000000000411 R_AARCH64_AUTH_RELATIVE ffffffedcba98766 +# PIE-NEXT: 00000000000304a0 0000000000000411 R_AARCH64_AUTH_RELATIVE 8003046f +# PIE-NEXT: 00000000000304b9 0000000000000411 R_AARCH64_AUTH_RELATIVE 4 +# PIE-NEXT: 00000000000304c2 0000000000000411 R_AARCH64_AUTH_RELATIVE 30475 +# PIE-NEXT: 00000000000304a8 0000000100000244 R_AARCH64_AUTH_ABS64 0000000000000000 zed2 + 1111 +# PIE-NEXT: 00000000000304b0 0000000200000244 R_AARCH64_AUTH_ABS64 0000000000000000 bar2 + 0 + +# HEX: Hex dump of section '.test': +# HEX-NEXT: 0x00030470 00000000 2a000020 00000000 2b000000 +## ^^^^ Discr = 42 +## ^^ Key (bits 5..6) = DA +## ^^^^ Discr = 43 +## ^^ Key (bits 5..6) = IA +# HEX-NEXT: 0x00030480 00000000 2c000080 00000000 2d000020 +## ^^^^ Discr = 44 +## ^^ Key (bits 5..6) = IA +## ^^ Addr diversity (bit 7) = true +## ^^^^ Discr = 45 +## ^^ Key (bits 5..6) = DA +# HEX-NEXT: 0x00030490 00000000 2e000020 00000000 2f000020 +## ^^^^ Discr = 46 +## ^^ Key (bits 5..6) = DA +## ^^^^ Discr = 47 +## ^^ Key (bits 5..6) = DA +# HEX-NEXT: 0x000304a0 00000000 30000020 00000000 31000020 +## ^^^^ Discr = 48 +## ^^ Key (bits 5..6) = DA +## ^^^^ Discr = 49 +## ^^ Key (bits 5..6) = DA +# HEX-NEXT: 0x000304b0 00000000 32000000 77000000 00330000 +## ^^^^ Discr = 50 +## ^^ Key (bits 5..6) = IA +## ^^^^ Discr = 51 +# HEX-NEXT: 0x000304c0 20770000 00003400 0020{{\ }} +## ^^ Key (bits 5..6) = DA +## ^^^^ Discr = 52 +## ^^ Key (bits 5..6) = DA + +.section .test, "aw" +.p2align 3 +.quad (__ehdr_start + 1)@AUTH(da,42) +.quad (.test + 2)@AUTH(ia,43) +.quad (__ehdr_start - 3)@AUTH(ia,44,addr) +.quad (__ehdr_start + 0x12345678)@AUTH(da,45) +.quad (__ehdr_start + 0x123456789A)@AUTH(da,46) +.quad (__ehdr_start - 0x123456789A)@AUTH(da,47) +.quad (.test + 0x7FFFFFFF)@AUTH(da,48) +.quad (zed2 + 0x1111)@AUTH(da,49) +.quad bar2@AUTH(ia,50) +.byte 0x77 +.quad (__ehdr_start + 4)@AUTH(da,51) +.byte 0x77 +.quad (.test + 5)@AUTH(da,52) -- cgit v1.1 From 71c3f5d617aa132418e87403c8be3cdcd102ab18 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 4 Apr 2024 11:41:27 +0200 Subject: [reland][libc] Refactor `BigInt` (#87613) This is a reland of #86137 with a fix for platforms / compiler that do not support trivially constructible int128 types. --- libc/fuzzing/CMakeLists.txt | 1 + libc/fuzzing/__support/CMakeLists.txt | 7 + libc/fuzzing/__support/uint_fuzz.cpp | 70 ++ libc/src/__support/FPUtil/dyadic_float.h | 6 +- libc/src/__support/UInt.h | 1129 ++++++++++---------- libc/src/__support/float_to_string.h | 7 +- libc/src/__support/integer_literals.h | 25 +- libc/src/__support/math_extras.h | 249 ++--- libc/src/__support/number_pair.h | 11 - libc/test/src/__support/integer_literals_test.cpp | 21 + libc/test/src/__support/math_extras_test.cpp | 57 + libc/test/src/__support/uint_test.cpp | 192 +++- .../libc/test/src/__support/BUILD.bazel | 1 + 13 files changed, 1014 insertions(+), 762 deletions(-) create mode 100644 libc/fuzzing/__support/CMakeLists.txt create mode 100644 libc/fuzzing/__support/uint_fuzz.cpp diff --git a/libc/fuzzing/CMakeLists.txt b/libc/fuzzing/CMakeLists.txt index 8248768..816691b 100644 --- a/libc/fuzzing/CMakeLists.txt +++ b/libc/fuzzing/CMakeLists.txt @@ -1,6 +1,7 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=fuzzer") add_custom_target(libc-fuzzer) +add_subdirectory(__support) # TODO(#85680): Re-enable math fuzzing after headers are sorted out # add_subdirectory(math) add_subdirectory(stdlib) diff --git a/libc/fuzzing/__support/CMakeLists.txt b/libc/fuzzing/__support/CMakeLists.txt new file mode 100644 index 0000000..278e914 --- /dev/null +++ b/libc/fuzzing/__support/CMakeLists.txt @@ -0,0 +1,7 @@ +add_libc_fuzzer( + uint_fuzz + SRCS + uint_fuzz.cpp + DEPENDS + libc.src.__support.uint +) diff --git a/libc/fuzzing/__support/uint_fuzz.cpp b/libc/fuzzing/__support/uint_fuzz.cpp new file mode 100644 index 0000000..f48f00d --- /dev/null +++ b/libc/fuzzing/__support/uint_fuzz.cpp @@ -0,0 +1,70 @@ +#include "src/__support/CPP/bit.h" +#include "src/__support/UInt.h" +#include "src/string/memory_utils/inline_memcpy.h" + +using namespace LIBC_NAMESPACE; + +// Helper function when using gdb / lldb to set a breakpoint and inspect values. +template void debug_and_trap(const char *msg, T a, T b) { + __builtin_trap(); +} + +#define DEBUG_AND_TRAP() + +#define TEST_BINOP(OP) \ + if ((a OP b) != (static_cast(BigInt(a) OP BigInt(b)))) \ + debug_and_trap(#OP, a, b); + +#define TEST_SHIFTOP(OP) \ + if ((a OP b) != (static_cast(BigInt(a) OP b))) \ + debug_and_trap(#OP, a, b); + +#define TEST_FUNCTION(FUN) \ + if (FUN(a) != FUN(BigInt(a))) \ + debug_and_trap(#FUN, a, b); + +// Test that basic arithmetic operations of BigInt behave like their scalar +// counterparts. +template void run_tests(T a, T b) { + TEST_BINOP(+) + TEST_BINOP(-) + TEST_BINOP(*) + if (b != 0) + TEST_BINOP(/) + if (b >= 0 && b < cpp::numeric_limits::digits) { + TEST_SHIFTOP(<<) + TEST_SHIFTOP(>>) + } + if constexpr (!BigInt::SIGNED) { + TEST_FUNCTION(cpp::has_single_bit) + TEST_FUNCTION(cpp::countr_zero) + TEST_FUNCTION(cpp::countl_zero) + TEST_FUNCTION(cpp::countl_one) + TEST_FUNCTION(cpp::countr_one) + } +} + +// Reads a T from libfuzzer data. +template T read(const uint8_t *data, size_t &remainder) { + T out = 0; + constexpr size_t T_SIZE = sizeof(T); + const size_t copy_size = remainder < T_SIZE ? remainder : T_SIZE; + inline_memcpy(&out, data, copy_size); + remainder -= copy_size; + return out; +} + +template +void run_tests(const uint8_t *data, size_t size) { + const auto a = read(data, size); + const auto b = read(data, size); + run_tests(a, b); +} + +extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) { + // unsigned + run_tests>(data, size); + // signed + run_tests>(data, size); + return 0; +} diff --git a/libc/src/__support/FPUtil/dyadic_float.h b/libc/src/__support/FPUtil/dyadic_float.h index 73fd738..e0c205f 100644 --- a/libc/src/__support/FPUtil/dyadic_float.h +++ b/libc/src/__support/FPUtil/dyadic_float.h @@ -58,9 +58,9 @@ template struct DyadicFloat { // significant bit. LIBC_INLINE constexpr DyadicFloat &normalize() { if (!mantissa.is_zero()) { - int shift_length = static_cast(mantissa.clz()); + int shift_length = cpp::countl_zero(mantissa); exponent -= shift_length; - mantissa.shift_left(static_cast(shift_length)); + mantissa <<= static_cast(shift_length); } return *this; } @@ -233,7 +233,7 @@ LIBC_INLINE constexpr DyadicFloat quick_add(DyadicFloat a, result.sign = a.sign; result.exponent = a.exponent; result.mantissa = a.mantissa; - if (result.mantissa.add(b.mantissa)) { + if (result.mantissa.add_overflow(b.mantissa)) { // Mantissa addition overflow. result.shift_right(1); result.mantissa.val[DyadicFloat::MantissaType::WORD_COUNT - 1] |= diff --git a/libc/src/__support/UInt.h b/libc/src/__support/UInt.h index 282efdb..c1e55ce 100644 --- a/libc/src/__support/UInt.h +++ b/libc/src/__support/UInt.h @@ -14,10 +14,11 @@ #include "src/__support/CPP/limits.h" #include "src/__support/CPP/optional.h" #include "src/__support/CPP/type_traits.h" -#include "src/__support/macros/attributes.h" // LIBC_INLINE -#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/attributes.h" // LIBC_INLINE +#include "src/__support/macros/optimization.h" // LIBC_UNLIKELY +#include "src/__support/macros/properties/compiler.h" // LIBC_COMPILER_IS_CLANG #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128, LIBC_TYPES_HAS_INT64 -#include "src/__support/math_extras.h" // SumCarry, DiffBorrow +#include "src/__support/math_extras.h" // add_with_carry, sub_with_borrow #include "src/__support/number_pair.h" #include // For size_t @@ -25,71 +26,324 @@ namespace LIBC_NAMESPACE { -namespace internal { -template struct half_width; +namespace multiword { -template <> struct half_width : cpp::type_identity {}; -template <> struct half_width : cpp::type_identity {}; +// A type trait mapping unsigned integers to their half-width unsigned +// counterparts. +template struct half_width; template <> struct half_width : cpp::type_identity {}; +template <> struct half_width : cpp::type_identity {}; +#ifdef LIBC_TYPES_HAS_INT64 +template <> struct half_width : cpp::type_identity {}; #ifdef LIBC_TYPES_HAS_INT128 template <> struct half_width<__uint128_t> : cpp::type_identity {}; #endif // LIBC_TYPES_HAS_INT128 - +#endif // LIBC_TYPES_HAS_INT64 template using half_width_t = typename half_width::type; -template constexpr NumberPair full_mul(T a, T b) { - NumberPair pa = split(a); - NumberPair pb = split(b); - NumberPair prod; +// An array of two elements that can be used in multiword operations. +template struct DoubleWide final : cpp::array { + using UP = cpp::array; + using UP::UP; + LIBC_INLINE constexpr DoubleWide(T lo, T hi) : UP({lo, hi}) {} +}; + +// Converts an unsigned value into a DoubleWide>. +template LIBC_INLINE constexpr auto split(T value) { + static_assert(cpp::is_unsigned_v); + using half_type = half_width_t; + return DoubleWide( + half_type(value), + half_type(value >> cpp::numeric_limits::digits)); +} + +// The low part of a DoubleWide value. +template LIBC_INLINE constexpr T lo(const DoubleWide &value) { + return value[0]; +} +// The high part of a DoubleWide value. +template LIBC_INLINE constexpr T hi(const DoubleWide &value) { + return value[1]; +} +// The low part of an unsigned value. +template LIBC_INLINE constexpr half_width_t lo(T value) { + return lo(split(value)); +} +// The high part of an unsigned value. +template LIBC_INLINE constexpr half_width_t hi(T value) { + return hi(split(value)); +} + +// Returns 'a' times 'b' in a DoubleWide. Cannot overflow by construction. +template +LIBC_INLINE constexpr DoubleWide mul2(word a, word b) { + if constexpr (cpp::is_same_v) { + return split(uint16_t(a) * uint16_t(b)); + } else if constexpr (cpp::is_same_v) { + return split(uint32_t(a) * uint32_t(b)); + } +#ifdef LIBC_TYPES_HAS_INT64 + else if constexpr (cpp::is_same_v) { + return split(uint64_t(a) * uint64_t(b)); + } +#endif +#ifdef LIBC_TYPES_HAS_INT128 + else if constexpr (cpp::is_same_v) { + return split<__uint128_t>(__uint128_t(a) * __uint128_t(b)); + } +#endif + else { + using half_word = half_width_t; + const auto shiftl = [](word value) -> word { + return value << cpp::numeric_limits::digits; + }; + const auto shiftr = [](word value) -> word { + return value >> cpp::numeric_limits::digits; + }; + // Here we do a one digit multiplication where 'a' and 'b' are of type + // word. We split 'a' and 'b' into half words and perform the classic long + // multiplication with 'a' and 'b' being two-digit numbers. + + // a a_hi a_lo + // x b => x b_hi b_lo + // ---- ----------- + // c result + // We convert 'lo' and 'hi' from 'half_word' to 'word' so multiplication + // doesn't overflow. + const word a_lo = lo(a); + const word b_lo = lo(b); + const word a_hi = hi(a); + const word b_hi = hi(b); + const word step1 = b_lo * a_lo; // no overflow; + const word step2 = b_lo * a_hi; // no overflow; + const word step3 = b_hi * a_lo; // no overflow; + const word step4 = b_hi * a_hi; // no overflow; + word lo_digit = step1; + word hi_digit = step4; + const word no_carry = 0; + word carry; + word _; // unused carry variable. + lo_digit = add_with_carry(lo_digit, shiftl(step2), no_carry, carry); + hi_digit = add_with_carry(hi_digit, shiftr(step2), carry, _); + lo_digit = add_with_carry(lo_digit, shiftl(step3), no_carry, carry); + hi_digit = add_with_carry(hi_digit, shiftr(step3), carry, _); + return DoubleWide(lo_digit, hi_digit); + } +} + +// In-place 'dst op= rhs' with operation with carry propagation. Returns carry. +template +LIBC_INLINE constexpr word inplace_binop(Function op_with_carry, + cpp::array &dst, + const cpp::array &rhs) { + static_assert(N >= M); + word carry_out = 0; + for (size_t i = 0; i < N; ++i) { + const bool has_rhs_value = i < M; + const word rhs_value = has_rhs_value ? rhs[i] : 0; + const word carry_in = carry_out; + dst[i] = op_with_carry(dst[i], rhs_value, carry_in, carry_out); + // stop early when rhs is over and no carry is to be propagated. + if (!has_rhs_value && carry_out == 0) + break; + } + return carry_out; +} - prod.lo = pa.lo * pb.lo; // exact - prod.hi = pa.hi * pb.hi; // exact - NumberPair lo_hi = split(pa.lo * pb.hi); // exact - NumberPair hi_lo = split(pa.hi * pb.lo); // exact +// In-place addition. Returns carry. +template +LIBC_INLINE constexpr word add_with_carry(cpp::array &dst, + const cpp::array &rhs) { + return inplace_binop(LIBC_NAMESPACE::add_with_carry, dst, rhs); +} + +// In-place subtraction. Returns borrow. +template +LIBC_INLINE constexpr word sub_with_borrow(cpp::array &dst, + const cpp::array &rhs) { + return inplace_binop(LIBC_NAMESPACE::sub_with_borrow, dst, rhs); +} + +// In-place multiply-add. Returns carry. +// i.e., 'dst += b * c' +template +LIBC_INLINE constexpr word mul_add_with_carry(cpp::array &dst, word b, + word c) { + return add_with_carry(dst, mul2(b, c)); +} - constexpr size_t HALF_BIT_WIDTH = sizeof(T) * CHAR_BIT / 2; +// An array of two elements serving as an accumulator during multiword +// computations. +template struct Accumulator final : cpp::array { + using UP = cpp::array; + LIBC_INLINE constexpr Accumulator() : UP({0, 0}) {} + LIBC_INLINE constexpr T advance(T carry_in) { + auto result = UP::front(); + UP::front() = UP::back(); + UP::back() = carry_in; + return result; + } + LIBC_INLINE constexpr T sum() const { return UP::front(); } + LIBC_INLINE constexpr T carry() const { return UP::back(); } +}; - auto r1 = add_with_carry(prod.lo, lo_hi.lo << HALF_BIT_WIDTH, T(0)); - prod.lo = r1.sum; - prod.hi = add_with_carry(prod.hi, lo_hi.hi, r1.carry).sum; +// In-place multiplication by a single word. Returns carry. +template +LIBC_INLINE constexpr word scalar_multiply_with_carry(cpp::array &dst, + word x) { + Accumulator acc; + for (auto &val : dst) { + const word carry = mul_add_with_carry(acc, val, x); + val = acc.advance(carry); + } + return acc.carry(); +} - auto r2 = add_with_carry(prod.lo, hi_lo.lo << HALF_BIT_WIDTH, T(0)); - prod.lo = r2.sum; - prod.hi = add_with_carry(prod.hi, hi_lo.hi, r2.carry).sum; +// Multiplication of 'lhs' by 'rhs' into 'dst'. Returns carry. +// This function is safe to use for signed numbers. +// https://stackoverflow.com/a/20793834 +// https://pages.cs.wisc.edu/%7Emarkhill/cs354/Fall2008/beyond354/int.mult.html +template +LIBC_INLINE constexpr word multiply_with_carry(cpp::array &dst, + const cpp::array &lhs, + const cpp::array &rhs) { + static_assert(O >= M + N); + Accumulator acc; + for (size_t i = 0; i < O; ++i) { + const size_t lower_idx = i < N ? 0 : i - N + 1; + const size_t upper_idx = i < M ? i : M - 1; + word carry = 0; + for (size_t j = lower_idx; j <= upper_idx; ++j) + carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); + dst[i] = acc.advance(carry); + } + return acc.carry(); +} - return prod; +template +LIBC_INLINE constexpr void quick_mul_hi(cpp::array &dst, + const cpp::array &lhs, + const cpp::array &rhs) { + Accumulator acc; + word carry = 0; + // First round of accumulation for those at N - 1 in the full product. + for (size_t i = 0; i < N; ++i) + carry += mul_add_with_carry(acc, lhs[i], rhs[N - 1 - i]); + for (size_t i = N; i < 2 * N - 1; ++i) { + acc.advance(carry); + carry = 0; + for (size_t j = i - N + 1; j < N; ++j) + carry += mul_add_with_carry(acc, lhs[j], rhs[i - j]); + dst[i - N] = acc.sum(); + } + dst.back() = acc.carry(); } -template <> -LIBC_INLINE constexpr NumberPair full_mul(uint32_t a, - uint32_t b) { - uint64_t prod = uint64_t(a) * uint64_t(b); - NumberPair result; - result.lo = uint32_t(prod); - result.hi = uint32_t(prod >> 32); - return result; +template +LIBC_INLINE constexpr bool is_negative(cpp::array &array) { + using signed_word = cpp::make_signed_t; + return cpp::bit_cast(array.back()) < 0; } +// An enum for the shift function below. +enum Direction { LEFT, RIGHT }; + +// A bitwise shift on an array of elements. +// TODO: Make the result UB when 'offset' is greater or equal to the number of +// bits in 'array'. This will allow for better code performance. +template +LIBC_INLINE constexpr cpp::array shift(cpp::array array, + size_t offset) { + static_assert(direction == LEFT || direction == RIGHT); + constexpr size_t WORD_BITS = cpp::numeric_limits::digits; + constexpr size_t TOTAL_BITS = N * WORD_BITS; + if (LIBC_UNLIKELY(offset == 0)) + return array; + if (LIBC_UNLIKELY(offset >= TOTAL_BITS)) + return {}; #ifdef LIBC_TYPES_HAS_INT128 -template <> -LIBC_INLINE constexpr NumberPair full_mul(uint64_t a, - uint64_t b) { - __uint128_t prod = __uint128_t(a) * __uint128_t(b); - NumberPair result; - result.lo = uint64_t(prod); - result.hi = uint64_t(prod >> 64); - return result; + if constexpr (TOTAL_BITS == 128) { + using type = cpp::conditional_t; + auto tmp = cpp::bit_cast(array); + if constexpr (direction == LEFT) + tmp <<= offset; + else + tmp >>= offset; + return cpp::bit_cast>(tmp); + } +#endif + const bool is_neg = is_signed && is_negative(array); + constexpr auto at = [](size_t index) -> int { + // reverse iteration when direction == LEFT. + if constexpr (direction == LEFT) + return int(N) - int(index) - 1; + return int(index); + }; + const auto safe_get_at = [&](size_t index) -> word { + // return appropriate value when accessing out of bound elements. + const int i = at(index); + if (i < 0) + return 0; + if (i >= int(N)) + return is_neg ? -1 : 0; + return array[i]; + }; + const size_t index_offset = offset / WORD_BITS; + const size_t bit_offset = offset % WORD_BITS; +#ifdef LIBC_COMPILER_IS_CLANG + __builtin_assume(index_offset < N); +#endif + cpp::array out = {}; + for (size_t index = 0; index < N; ++index) { + const word part1 = safe_get_at(index + index_offset); + const word part2 = safe_get_at(index + index_offset + 1); + word &dst = out[at(index)]; + if (bit_offset == 0) + dst = part1; // no crosstalk between parts. + else if constexpr (direction == LEFT) + dst = (part1 << bit_offset) | (part2 >> (WORD_BITS - bit_offset)); + else + dst = (part1 >> bit_offset) | (part2 << (WORD_BITS - bit_offset)); + } + return out; } -#endif // LIBC_TYPES_HAS_INT128 -} // namespace internal +#define DECLARE_COUNTBIT(NAME, INDEX_EXPR) \ + template \ + LIBC_INLINE constexpr int NAME(const cpp::array &val) { \ + int bit_count = 0; \ + for (size_t i = 0; i < N; ++i) { \ + const int word_count = cpp::NAME(val[INDEX_EXPR]); \ + bit_count += word_count; \ + if (word_count != cpp::numeric_limits::digits) \ + break; \ + } \ + return bit_count; \ + } + +DECLARE_COUNTBIT(countr_zero, i) // iterating forward +DECLARE_COUNTBIT(countr_one, i) // iterating forward +DECLARE_COUNTBIT(countl_zero, N - i - 1) // iterating backward +DECLARE_COUNTBIT(countl_one, N - i - 1) // iterating backward + +} // namespace multiword template struct BigInt { +private: static_assert(cpp::is_integral_v && cpp::is_unsigned_v, "WordType must be unsigned integer."); + struct Division { + BigInt quotient; + BigInt remainder; + }; + +public: using word_type = WordType; + using unsigned_type = BigInt; + using signed_type = BigInt; + LIBC_INLINE_VAR static constexpr bool SIGNED = Signed; LIBC_INLINE_VAR static constexpr size_t BITS = Bits; LIBC_INLINE_VAR @@ -100,10 +354,7 @@ struct BigInt { LIBC_INLINE_VAR static constexpr size_t WORD_COUNT = Bits / WORD_SIZE; - using unsigned_type = BigInt; - using signed_type = BigInt; - - cpp::array val{}; + cpp::array val{}; // zero initialized. LIBC_INLINE constexpr BigInt() = default; @@ -112,76 +363,67 @@ struct BigInt { template LIBC_INLINE constexpr BigInt( const BigInt &other) { - if (OtherBits >= Bits) { + if (OtherBits >= Bits) { // truncate for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = other[i]; - } else { + } else { // zero or sign extend size_t i = 0; for (; i < OtherBits / WORD_SIZE; ++i) val[i] = other[i]; - WordType sign = 0; - if constexpr (Signed && OtherSigned) { - sign = static_cast( - -static_cast>(other.is_neg())); - } - for (; i < WORD_COUNT; ++i) - val[i] = sign; + extend(i, Signed && other.is_neg()); } } // Construct a BigInt from a C array. - template = 0> - LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { - size_t min_wordcount = N < WORD_COUNT ? N : WORD_COUNT; - size_t i = 0; - for (; i < min_wordcount; ++i) + template LIBC_INLINE constexpr BigInt(const WordType (&nums)[N]) { + static_assert(N == WORD_COUNT); + for (size_t i = 0; i < WORD_COUNT; ++i) val[i] = nums[i]; + } - // If nums doesn't completely fill val, then fill the rest with zeroes. - for (; i < WORD_COUNT; ++i) - val[i] = 0; + LIBC_INLINE constexpr explicit BigInt( + const cpp::array &words) { + val = words; } // Initialize the first word to |v| and the rest to 0. template >> LIBC_INLINE constexpr BigInt(T v) { - val[0] = static_cast(v); - - if constexpr (WORD_COUNT == 1) - return; - - if constexpr (Bits < sizeof(T) * CHAR_BIT) { - for (int i = 1; i < WORD_COUNT; ++i) { - v >>= WORD_SIZE; - val[i] = static_cast(v); + constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; + const bool is_neg = Signed && (v < 0); + for (size_t i = 0; i < WORD_COUNT; ++i) { + if (v == 0) { + extend(i, is_neg); + return; } - return; - } - - size_t i = 1; - - if constexpr (WORD_SIZE < sizeof(T) * CHAR_BIT) - for (; i < sizeof(T) * CHAR_BIT / WORD_SIZE; ++i) { + val[i] = static_cast(v); + if constexpr (T_SIZE > WORD_SIZE) v >>= WORD_SIZE; - val[i] = static_cast(v); - } - - WordType sign = (Signed && (v < 0)) ? ~WordType(0) : WordType(0); - for (; i < WORD_COUNT; ++i) { - val[i] = sign; + else + v = 0; } } + LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; - LIBC_INLINE constexpr explicit BigInt( - const cpp::array &words) { - for (size_t i = 0; i < WORD_COUNT; ++i) - val[i] = words[i]; + // constants + LIBC_INLINE static constexpr BigInt zero() { return BigInt(); } + LIBC_INLINE static constexpr BigInt one() { return BigInt(1); } + LIBC_INLINE static constexpr BigInt all_ones() { return ~zero(); } + LIBC_INLINE static constexpr BigInt min() { + BigInt out; + if constexpr (SIGNED) + out.set_msb(); + return out; + } + LIBC_INLINE static constexpr BigInt max() { + BigInt out = all_ones(); + if constexpr (SIGNED) + out.clear_msb(); + return out; } // TODO: Reuse the Sign type. - LIBC_INLINE constexpr bool is_neg() const { - return val.back() >> (WORD_SIZE - 1); - } + LIBC_INLINE constexpr bool is_neg() const { return SIGNED && get_msb(); } template LIBC_INLINE constexpr explicit operator T() const { return to(); @@ -191,200 +433,100 @@ struct BigInt { LIBC_INLINE constexpr cpp::enable_if_t< cpp::is_integral_v && !cpp::is_same_v, T> to() const { + constexpr size_t T_SIZE = sizeof(T) * CHAR_BIT; T lo = static_cast(val[0]); - - constexpr size_t T_BITS = sizeof(T) * CHAR_BIT; - - if constexpr (T_BITS <= WORD_SIZE) + if constexpr (T_SIZE <= WORD_SIZE) return lo; - constexpr size_t MAX_COUNT = - T_BITS > Bits ? WORD_COUNT : T_BITS / WORD_SIZE; + T_SIZE > Bits ? WORD_COUNT : T_SIZE / WORD_SIZE; for (size_t i = 1; i < MAX_COUNT; ++i) lo += static_cast(val[i]) << (WORD_SIZE * i); - - if constexpr (Signed && (T_BITS > Bits)) { + if constexpr (Signed && (T_SIZE > Bits)) { // Extend sign for negative numbers. constexpr T MASK = (~T(0) << Bits); if (is_neg()) lo |= MASK; } - return lo; } LIBC_INLINE constexpr explicit operator bool() const { return !is_zero(); } - LIBC_INLINE constexpr BigInt &operator=(const BigInt &other) = default; - LIBC_INLINE constexpr bool is_zero() const { - for (size_t i = 0; i < WORD_COUNT; ++i) { - if (val[i] != 0) + for (auto part : val) + if (part != 0) return false; - } return true; } - // Add x to this number and store the result in this number. + // Add 'rhs' to this number and store the result in this number. // Returns the carry value produced by the addition operation. - LIBC_INLINE constexpr WordType add(const BigInt &x) { - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], x.val[i], s.carry); - val[i] = s.sum; - } - return s.carry; + LIBC_INLINE constexpr WordType add_overflow(const BigInt &rhs) { + return multiword::add_with_carry(val, rhs.val); } LIBC_INLINE constexpr BigInt operator+(const BigInt &other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], other.val[i], s.carry); - result.val[i] = s.sum; - } + BigInt result = *this; + result.add_overflow(other); return result; } // This will only apply when initializing a variable from constant values, so // it will always use the constexpr version of add_with_carry. LIBC_INLINE constexpr BigInt operator+(BigInt &&other) const { - BigInt result; - SumCarry s{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - s = add_with_carry(val[i], other.val[i], s.carry); - result.val[i] = s.sum; - } - return result; + // We use addition commutativity to reuse 'other' and prevent allocation. + other.add_overflow(*this); // Returned carry value is ignored. + return other; } LIBC_INLINE constexpr BigInt &operator+=(const BigInt &other) { - add(other); // Returned carry value is ignored. + add_overflow(other); // Returned carry value is ignored. return *this; } - // Subtract x to this number and store the result in this number. + // Subtract 'rhs' to this number and store the result in this number. // Returns the carry value produced by the subtraction operation. - LIBC_INLINE constexpr WordType sub(const BigInt &x) { - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], x.val[i], d.borrow); - val[i] = d.diff; - } - return d.borrow; + LIBC_INLINE constexpr WordType sub_overflow(const BigInt &rhs) { + return multiword::sub_with_borrow(val, rhs.val); } LIBC_INLINE constexpr BigInt operator-(const BigInt &other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], other.val[i], d.borrow); - result.val[i] = d.diff; - } + BigInt result = *this; + result.sub_overflow(other); // Returned carry value is ignored. return result; } LIBC_INLINE constexpr BigInt operator-(BigInt &&other) const { - BigInt result; - DiffBorrow d{0, 0}; - for (size_t i = 0; i < WORD_COUNT; ++i) { - d = sub_with_borrow(val[i], other.val[i], d.borrow); - result.val[i] = d.diff; - } + BigInt result = *this; + result.sub_overflow(other); // Returned carry value is ignored. return result; } LIBC_INLINE constexpr BigInt &operator-=(const BigInt &other) { // TODO(lntue): Set overflow flag / errno when carry is true. - sub(other); + sub_overflow(other); // Returned carry value is ignored. return *this; } - // Multiply this number with x and store the result in this number. It is - // implemented using the long multiplication algorithm by splitting the - // 64-bit words of this number and |x| in to 32-bit halves but peforming - // the operations using 64-bit numbers. This ensures that we don't lose the - // carry bits. - // Returns the carry value produced by the multiplication operation. + // Multiply this number with x and store the result in this number. LIBC_INLINE constexpr WordType mul(WordType x) { - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - for (size_t i = 0; i < WORD_COUNT; ++i) { - NumberPair prod = internal::full_mul(val[i], x); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - const WordType carry = partial_sum.add(tmp); - val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - } - return partial_sum.val[1]; - } - - LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { - if constexpr (Signed) { - BigInt a(*this); - BigInt b(other); - const bool a_neg = a.is_neg(); - const bool b_neg = b.is_neg(); - if (a_neg) - a = -a; - if (b_neg) - b = -b; - BigInt prod = a * b; - if (a_neg != b_neg) - prod = -prod; - return static_cast>(prod); - } else { - if constexpr (WORD_COUNT == 1) { - return {val[0] * other.val[0]}; - } else { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - for (size_t i = 0; i < WORD_COUNT; ++i) { - for (size_t j = 0; j <= i; j++) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - } - return result; - } - } + return multiword::scalar_multiply_with_carry(val, x); } - // Return the full product, only unsigned for now. + // Return the full product. template - LIBC_INLINE constexpr BigInt + LIBC_INLINE constexpr auto ful_mul(const BigInt &other) const { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - constexpr size_t OTHER_WORDCOUNT = - BigInt::WORD_COUNT; - for (size_t i = 0; i <= WORD_COUNT + OTHER_WORDCOUNT - 2; ++i) { - const size_t lower_idx = - i < OTHER_WORDCOUNT ? 0 : i - OTHER_WORDCOUNT + 1; - const size_t upper_idx = i < WORD_COUNT ? i : WORD_COUNT - 1; - for (size_t j = lower_idx; j <= upper_idx; ++j) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i] = partial_sum.val[0]; - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - } - result.val[WORD_COUNT + OTHER_WORDCOUNT - 1] = partial_sum.val[0]; + BigInt result; + multiword::multiply_with_carry(result.val, val, other.val); return result; } + LIBC_INLINE constexpr BigInt operator*(const BigInt &other) const { + // Perform full mul and truncate. + return BigInt(ful_mul(other)); + } + // Fast hi part of the full product. The normal product `operator*` returns // `Bits` least significant bits of the full product, while this function will // approximate `Bits` most significant bits of the full product with errors @@ -407,39 +549,17 @@ struct BigInt { // 256 4 16 10 3 // 512 8 64 36 7 LIBC_INLINE constexpr BigInt quick_mul_hi(const BigInt &other) const { - BigInt result(0); - BigInt<2 * WORD_SIZE, Signed, WordType> partial_sum(0); - WordType carry = 0; - // First round of accumulation for those at WORD_COUNT - 1 in the full - // product. - for (size_t i = 0; i < WORD_COUNT; ++i) { - NumberPair prod = - internal::full_mul(val[i], other.val[WORD_COUNT - 1 - i]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - for (size_t i = WORD_COUNT; i < 2 * WORD_COUNT - 1; ++i) { - partial_sum.val[0] = partial_sum.val[1]; - partial_sum.val[1] = carry; - carry = 0; - for (size_t j = i - WORD_COUNT + 1; j < WORD_COUNT; ++j) { - NumberPair prod = - internal::full_mul(val[j], other.val[i - j]); - BigInt<2 * WORD_SIZE, Signed, WordType> tmp({prod.lo, prod.hi}); - carry += partial_sum.add(tmp); - } - result.val[i - WORD_COUNT] = partial_sum.val[0]; - } - result.val[WORD_COUNT - 1] = partial_sum.val[1]; + BigInt result; + multiword::quick_mul_hi(result.val, val, other.val); return result; } - // pow takes a power and sets this to its starting value to that power. Zero - // to the zeroth power returns 1. + // BigInt(x).pow_n(n) computes x ^ n. + // Note 0 ^ 0 == 1. LIBC_INLINE constexpr void pow_n(uint64_t power) { - BigInt result = 1; + static_assert(!Signed); + BigInt result = one(); BigInt cur_power = *this; - while (power > 0) { if ((power % 2) > 0) result *= cur_power; @@ -449,38 +569,23 @@ struct BigInt { *this = result; } - // TODO: Make division work correctly for signed integers. - - // div takes another BigInt of the same size and divides this by it. The value - // of this will be set to the quotient, and the return value is the remainder. - LIBC_INLINE constexpr cpp::optional div(const BigInt &other) { - BigInt remainder(0); - if (*this < other) { - remainder = *this; - *this = BigInt(0); - return remainder; - } - if (other == 1) { - return remainder; - } - if (other == 0) { + // Performs inplace signed / unsigned division. Returns remainder if not + // dividing by zero. + // For signed numbers it behaves like C++ signed integer division. + // That is by truncating the fractionnal part + // https://stackoverflow.com/a/3602857 + LIBC_INLINE constexpr cpp::optional div(const BigInt ÷r) { + if (LIBC_UNLIKELY(divider.is_zero())) return cpp::nullopt; - } - - BigInt quotient(0); - BigInt subtractor = other; - int cur_bit = static_cast(subtractor.clz() - this->clz()); - subtractor.shift_left(cur_bit); - - for (; cur_bit >= 0 && *this > 0; --cur_bit, subtractor.shift_right(1)) { - if (*this >= subtractor) { - this->sub(subtractor); - quotient = quotient | (BigInt(1) << cur_bit); - } - } - remainder = *this; - *this = quotient; - return remainder; + if (LIBC_UNLIKELY(divider == BigInt::one())) + return BigInt::zero(); + Division result; + if constexpr (SIGNED) + result = divide_signed(*this, divider); + else + result = divide_unsigned(*this, divider); + *this = result.quotient; + return result.remainder; } // Efficiently perform BigInt / (x * 2^e), where x is a half-word-size @@ -496,19 +601,16 @@ struct BigInt { // computation of each step is now properly contained within WordType. // And finally we perform some extra alignment steps for the remaining bits. LIBC_INLINE constexpr cpp::optional - div_uint_half_times_pow_2(internal::half_width_t x, size_t e) { - BigInt remainder(0); - - if (x == 0) { + div_uint_half_times_pow_2(multiword::half_width_t x, size_t e) { + BigInt remainder; + if (x == 0) return cpp::nullopt; - } if (e >= Bits) { remainder = *this; - *this = BigInt(0); + *this = BigInt(); return remainder; } - - BigInt quotient(0); + BigInt quotient; WordType x_word = static_cast(x); constexpr size_t LOG2_WORD_SIZE = cpp::bit_width(WORD_SIZE) - 1; constexpr size_t HALF_WORD_SIZE = WORD_SIZE >> 1; @@ -633,189 +735,22 @@ struct BigInt { return *this; } - // TODO: remove and use cpp::countl_zero below. - [[nodiscard]] LIBC_INLINE constexpr int clz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int leading_zeroes = 0; - for (auto i = val.size(); i > 0;) { - --i; - const int zeroes = cpp::countl_zero(val[i]); - leading_zeroes += zeroes; - if (zeroes != word_digits) - break; - } - return leading_zeroes; - } - - // TODO: remove and use cpp::countr_zero below. - [[nodiscard]] LIBC_INLINE constexpr int ctz() const { - constexpr int word_digits = cpp::numeric_limits::digits; - int trailing_zeroes = 0; - for (auto word : val) { - const int zeroes = cpp::countr_zero(word); - trailing_zeroes += zeroes; - if (zeroes != word_digits) - break; - } - return trailing_zeroes; - } - - LIBC_INLINE constexpr void shift_left(size_t s) { - if constexpr (Bits == WORD_SIZE) { - // Use native types if possible. - if (s >= WORD_SIZE) { - val[0] = 0; - return; - } - val[0] <<= s; - return; - } - if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { - // Use builtin 64 bits for 32-bit base type if available; - if (s >= 64) { - val[0] = 0; - val[1] = 0; - return; - } - uint64_t tmp = uint64__t(val[0]) + (uint64_t(val[1]) << 62); - tmp <<= s; - val[0] = uint32_t(tmp); - val[1] = uint32_t(tmp >> 32); - return; - } -#ifdef LIBC_TYPES_HAS_INT128 - if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { - // Use builtin 128 bits if available; - if (s >= 128) { - val[0] = 0; - val[1] = 0; - return; - } - __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); - tmp <<= s; - val[0] = uint64_t(tmp); - val[1] = uint64_t(tmp >> 64); - return; - } -#endif // LIBC_TYPES_HAS_INT128 - if (LIBC_UNLIKELY(s == 0)) - return; - - const size_t drop = s / WORD_SIZE; // Number of words to drop - const size_t shift = s % WORD_SIZE; // Bits to shift in the remaining words. - size_t i = WORD_COUNT; - - if (drop < WORD_COUNT) { - i = WORD_COUNT - 1; - if (shift > 0) { - for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { - val[i] = (val[j] << shift) | (val[j - 1] >> (WORD_SIZE - shift)); - } - val[i] = val[0] << shift; - } else { - for (size_t j = WORD_COUNT - 1 - drop; j > 0; --i, --j) { - val[i] = val[j]; - } - val[i] = val[0]; - } - } - - for (size_t j = 0; j < i; ++j) { - val[j] = 0; - } + LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { + val = multiword::shift(val, s); + return *this; } LIBC_INLINE constexpr BigInt operator<<(size_t s) const { - BigInt result(*this); - result.shift_left(s); - return result; + return BigInt(multiword::shift(val, s)); } - LIBC_INLINE constexpr BigInt &operator<<=(size_t s) { - shift_left(s); + LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { + val = multiword::shift(val, s); return *this; } - LIBC_INLINE constexpr void shift_right(size_t s) { - if constexpr ((Bits == 64) && (WORD_SIZE == 32)) { - // Use builtin 64 bits if available; - if (s >= 64) { - val[0] = 0; - val[1] = 0; - return; - } - uint64_t tmp = uint64_t(val[0]) + (uint64_t(val[1]) << 32); - if constexpr (Signed) { - tmp = static_cast(static_cast(tmp) >> s); - } else { - tmp >>= s; - } - val[0] = uint32_t(tmp); - val[1] = uint32_t(tmp >> 32); - return; - } -#ifdef LIBC_TYPES_HAS_INT128 - if constexpr ((Bits == 128) && (WORD_SIZE == 64)) { - // Use builtin 128 bits if available; - if (s >= 128) { - val[0] = 0; - val[1] = 0; - return; - } - __uint128_t tmp = __uint128_t(val[0]) + (__uint128_t(val[1]) << 64); - if constexpr (Signed) { - tmp = static_cast<__uint128_t>(static_cast<__int128_t>(tmp) >> s); - } else { - tmp >>= s; - } - val[0] = uint64_t(tmp); - val[1] = uint64_t(tmp >> 64); - return; - } -#endif // LIBC_TYPES_HAS_INT128 - - if (LIBC_UNLIKELY(s == 0)) - return; - const size_t drop = s / WORD_SIZE; // Number of words to drop - const size_t shift = s % WORD_SIZE; // Bit shift in the remaining words. - - size_t i = 0; - WordType sign = Signed ? is_neg() : 0; - - if (drop < WORD_COUNT) { - if (shift > 0) { - for (size_t j = drop; j < WORD_COUNT - 1; ++i, ++j) { - val[i] = (val[j] >> shift) | (val[j + 1] << (WORD_SIZE - shift)); - } - if constexpr (Signed) { - val[i] = static_cast( - static_cast>(val[WORD_COUNT - 1]) >> - shift); - } else { - val[i] = val[WORD_COUNT - 1] >> shift; - } - ++i; - } else { - for (size_t j = drop; j < WORD_COUNT; ++i, ++j) { - val[i] = val[j]; - } - } - } - - for (; i < WORD_COUNT; ++i) { - val[i] = sign; - } - } - LIBC_INLINE constexpr BigInt operator>>(size_t s) const { - BigInt result(*this); - result.shift_right(s); - return result; - } - - LIBC_INLINE constexpr BigInt &operator>>=(size_t s) { - shift_right(s); - return *this; + return BigInt(multiword::shift(val, s)); } #define DEFINE_BINOP(OP) \ @@ -833,10 +768,9 @@ struct BigInt { return lhs; \ } - DEFINE_BINOP(&) - DEFINE_BINOP(|) - DEFINE_BINOP(^) - + DEFINE_BINOP(&) // & and &= + DEFINE_BINOP(|) // | and |= + DEFINE_BINOP(^) // ^ and ^= #undef DEFINE_BINOP LIBC_INLINE constexpr BigInt operator~() const { @@ -847,8 +781,8 @@ struct BigInt { } LIBC_INLINE constexpr BigInt operator-() const { - BigInt result = ~(*this); - result.add(BigInt(1)); + BigInt result(*this); + result.negate(); return result; } @@ -865,24 +799,6 @@ struct BigInt { return !(lhs == rhs); } -private: - LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { - const auto compare = [](WordType a, WordType b) { - return a == b ? 0 : a > b ? 1 : -1; - }; - if constexpr (Signed) { - const bool lhs_is_neg = lhs.is_neg(); - const bool rhs_is_neg = rhs.is_neg(); - if (lhs_is_neg != rhs_is_neg) - return rhs_is_neg ? 1 : -1; - } - for (size_t i = WORD_COUNT; i-- > 0;) - if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) - return cmp; - return 0; - } - -public: LIBC_INLINE friend constexpr bool operator>(const BigInt &lhs, const BigInt &rhs) { return cmp(lhs, rhs) > 0; @@ -901,24 +817,24 @@ public: } LIBC_INLINE constexpr BigInt &operator++() { - add(BigInt(1)); + increment(); return *this; } LIBC_INLINE constexpr BigInt operator++(int) { BigInt oldval(*this); - add(BigInt(1)); + increment(); return oldval; } LIBC_INLINE constexpr BigInt &operator--() { - sub(BigInt(1)); + decrement(); return *this; } LIBC_INLINE constexpr BigInt operator--(int) { BigInt oldval(*this); - sub(BigInt(1)); + decrement(); return oldval; } @@ -930,9 +846,117 @@ public: // Return the i-th word of the number. LIBC_INLINE constexpr WordType &operator[](size_t i) { return val[i]; } - LIBC_INLINE WordType *data() { return val; } +private: + LIBC_INLINE friend constexpr int cmp(const BigInt &lhs, const BigInt &rhs) { + constexpr auto compare = [](WordType a, WordType b) { + return a == b ? 0 : a > b ? 1 : -1; + }; + if constexpr (Signed) { + const bool lhs_is_neg = lhs.is_neg(); + const bool rhs_is_neg = rhs.is_neg(); + if (lhs_is_neg != rhs_is_neg) + return rhs_is_neg ? 1 : -1; + } + for (size_t i = WORD_COUNT; i-- > 0;) + if (auto cmp = compare(lhs[i], rhs[i]); cmp != 0) + return cmp; + return 0; + } + + LIBC_INLINE constexpr void bitwise_not() { + for (auto &part : val) + part = ~part; + } + + LIBC_INLINE constexpr void negate() { + bitwise_not(); + increment(); + } - LIBC_INLINE const WordType *data() const { return val; } + LIBC_INLINE constexpr void increment() { + multiword::add_with_carry(val, cpp::array{1}); + } + + LIBC_INLINE constexpr void decrement() { + multiword::add_with_carry(val, cpp::array{1}); + } + + LIBC_INLINE constexpr void extend(size_t index, bool is_neg) { + const WordType value = is_neg ? cpp::numeric_limits::max() + : cpp::numeric_limits::min(); + for (size_t i = index; i < WORD_COUNT; ++i) + val[i] = value; + } + + LIBC_INLINE constexpr bool get_msb() const { + return val.back() >> (WORD_SIZE - 1); + } + + LIBC_INLINE constexpr void set_msb() { + val.back() |= mask_leading_ones(); + } + + LIBC_INLINE constexpr void clear_msb() { + val.back() &= mask_trailing_ones(); + } + + LIBC_INLINE constexpr void set_bit(size_t i) { + const size_t word_index = i / WORD_SIZE; + val[word_index] |= WordType(1) << (i % WORD_SIZE); + } + + LIBC_INLINE constexpr static Division divide_unsigned(const BigInt ÷nd, + const BigInt ÷r) { + BigInt remainder = dividend; + BigInt quotient; + if (remainder >= divider) { + BigInt subtractor = divider; + int cur_bit = multiword::countl_zero(subtractor.val) - + multiword::countl_zero(remainder.val); + subtractor <<= cur_bit; + for (; cur_bit >= 0 && remainder > 0; --cur_bit, subtractor >>= 1) { + if (remainder < subtractor) + continue; + remainder -= subtractor; + quotient.set_bit(cur_bit); + } + } + return Division{quotient, remainder}; + } + + LIBC_INLINE constexpr static Division divide_signed(const BigInt ÷nd, + const BigInt ÷r) { + // Special case because it is not possible to negate the min value of a + // signed integer. + if (dividend == min() && divider == min()) + return Division{one(), zero()}; + // 1. Convert the dividend and divisor to unsigned representation. + unsigned_type udividend(dividend); + unsigned_type udivider(divider); + // 2. Negate the dividend if it's negative, and similarly for the divisor. + const bool dividend_is_neg = dividend.is_neg(); + const bool divider_is_neg = divider.is_neg(); + if (dividend_is_neg) + udividend.negate(); + if (divider_is_neg) + udivider.negate(); + // 3. Use unsigned multiword division algorithm. + const auto unsigned_result = divide_unsigned(udividend, udivider); + // 4. Convert the quotient and remainder to signed representation. + Division result; + result.quotient = signed_type(unsigned_result.quotient); + result.remainder = signed_type(unsigned_result.remainder); + // 5. Negate the quotient if the dividend and divisor had opposite signs. + if (dividend_is_neg != divider_is_neg) + result.quotient.negate(); + // 6. Negate the remainder if the dividend was negative. + if (dividend_is_neg) + result.remainder.negate(); + return result; + } + + friend signed_type; + friend unsigned_type; }; namespace internal { @@ -962,10 +986,8 @@ using Int = BigInt>; // Provides limits of U/Int<128>. template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr UInt<128> max() { - return UInt<128>({0xffff'ffff'ffff'ffff, 0xffff'ffff'ffff'ffff}); - } - LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>(0); } + LIBC_INLINE static constexpr UInt<128> max() { return UInt<128>::max(); } + LIBC_INLINE static constexpr UInt<128> min() { return UInt<128>::min(); } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -973,12 +995,8 @@ public: template <> class cpp::numeric_limits> { public: - LIBC_INLINE static constexpr Int<128> max() { - return Int<128>({0xffff'ffff'ffff'ffff, 0x7fff'ffff'ffff'ffff}); - } - LIBC_INLINE static constexpr Int<128> min() { - return Int<128>({0, 0x8000'0000'0000'0000}); - } + LIBC_INLINE static constexpr Int<128> max() { return Int<128>::max(); } + LIBC_INLINE static constexpr Int<128> min() { return Int<128>::min(); } // Meant to match std::numeric_limits interface. // NOLINTNEXTLINE(readability-identifier-naming) LIBC_INLINE_VAR static constexpr int digits = 128; @@ -1112,30 +1130,28 @@ has_single_bit(T value) { template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_zero(const T &value) { - return value.ctz(); + return multiword::countr_zero(value.val); } // Specialization of cpp::countl_zero ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_zero(const T &value) { - return value.clz(); + return multiword::countl_zero(value.val); } // Specialization of cpp::countl_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countl_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countl_zero(~value); + return multiword::countl_one(value.val); } // Specialization of cpp::countr_one ('bit.h') for BigInt. template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> countr_one(T value) { - // TODO : Implement a faster version not involving operator~. - return cpp::countr_zero(~value); + return multiword::countr_one(value.val); } // Specialization of cpp::bit_width ('bit.h') for BigInt. @@ -1182,65 +1198,59 @@ rotr(T value, int rotate) { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_trailing_ones() { - static_assert(!T::SIGNED); - if (count == 0) - return T(); - constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); - static_assert(count <= T_BITS && "Invalid bit index"); - using word_type = typename T::word_type; - T out; - constexpr int CHUNK_INDEX_CONTAINING_BIT = - static_cast(count / T::WORD_SIZE); - int index = 0; - for (auto &word : out.val) { - if (index < CHUNK_INDEX_CONTAINING_BIT) - word = -1; - else if (index > CHUNK_INDEX_CONTAINING_BIT) - word = 0; - else - word = mask_trailing_ones(); - ++index; - } + static_assert(!T::SIGNED && count <= T::BITS); + if (count == T::BITS) + return T::all_ones(); + constexpr size_t QUOTIENT = count / T::WORD_SIZE; + constexpr size_t REMAINDER = count % T::WORD_SIZE; + T out; // zero initialized + for (size_t i = 0; i <= QUOTIENT; ++i) + out[i] = i < QUOTIENT + ? -1 + : mask_trailing_ones(); return out; } // Specialization of mask_leading_ones ('math_extras.h') for BigInt. template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - static_assert(!T::SIGNED); - if (count == 0) - return T(); - constexpr unsigned T_BITS = CHAR_BIT * sizeof(T); - static_assert(count <= T_BITS && "Invalid bit index"); - using word_type = typename T::word_type; - T out; - constexpr int CHUNK_INDEX_CONTAINING_BIT = - static_cast((T::BITS - count - 1ULL) / T::WORD_SIZE); - int index = 0; - for (auto &word : out.val) { - if (index < CHUNK_INDEX_CONTAINING_BIT) - word = 0; - else if (index > CHUNK_INDEX_CONTAINING_BIT) - word = -1; - else - word = mask_leading_ones(); - ++index; - } + static_assert(!T::SIGNED && count <= T::BITS); + if (count == T::BITS) + return T::all_ones(); + constexpr size_t QUOTIENT = (T::BITS - count - 1U) / T::WORD_SIZE; + constexpr size_t REMAINDER = count % T::WORD_SIZE; + T out; // zero initialized + for (size_t i = QUOTIENT; i < T::WORD_COUNT; ++i) + out[i] = i > QUOTIENT + ? -1 + : mask_leading_ones(); return out; } +// Specialization of mask_trailing_zeros ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_zeros() { + return mask_leading_ones(); +} + +// Specialization of mask_leading_zeros ('math_extras.h') for BigInt. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_zeros() { + return mask_trailing_ones(); +} + // Specialization of count_zeros ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> count_zeros(T value) { return cpp::popcount(~value); } // Specialization of first_leading_zero ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countl_one(value) + 1; @@ -1248,16 +1258,14 @@ first_leading_zero(T value) { // Specialization of first_leading_one ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_leading_one(T value) { return first_leading_zero(~value); } // Specialization of first_trailing_zero ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_zero(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(~value) + 1; @@ -1265,8 +1273,7 @@ first_trailing_zero(T value) { // Specialization of first_trailing_one ('math_extras.h') for BigInt. template -[[nodiscard]] -LIBC_INLINE constexpr cpp::enable_if_t, int> +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> first_trailing_one(T value) { return value == cpp::numeric_limits::max() ? 0 : cpp::countr_zero(value) + 1; diff --git a/libc/src/__support/float_to_string.h b/libc/src/__support/float_to_string.h index 1287c3e..4c59cfd 100644 --- a/libc/src/__support/float_to_string.h +++ b/libc/src/__support/float_to_string.h @@ -689,7 +689,7 @@ template <> class FloatToString { wide_int float_as_int = mantissa; - float_as_int.shift_left(exponent); + float_as_int <<= exponent; int_block_index = 0; while (float_as_int > 0) { @@ -708,10 +708,11 @@ template <> class FloatToString { const int SHIFT_AMOUNT = FLOAT_AS_INT_WIDTH + exponent; static_assert(EXTRA_INT_WIDTH >= sizeof(long double) * 8); - float_as_fixed.shift_left(SHIFT_AMOUNT); + float_as_fixed <<= SHIFT_AMOUNT; // If there are still digits above the decimal point, handle those. - if (float_as_fixed.clz() < static_cast(EXTRA_INT_WIDTH)) { + if (cpp::countl_zero(float_as_fixed) < + static_cast(EXTRA_INT_WIDTH)) { UInt above_decimal_point = float_as_fixed >> FLOAT_AS_INT_WIDTH; diff --git a/libc/src/__support/integer_literals.h b/libc/src/__support/integer_literals.h index de1f88f..e99799c 100644 --- a/libc/src/__support/integer_literals.h +++ b/libc/src/__support/integer_literals.h @@ -151,12 +151,15 @@ template struct Parser> { template LIBC_INLINE constexpr T parse_with_prefix(const char *ptr) { using P = Parser; - if (ptr[0] == '0' && ptr[1] == 'x') - return P::template parse<16>(ptr + 2); - else if (ptr[0] == '0' && ptr[1] == 'b') - return P::template parse<2>(ptr + 2); - else - return P::template parse<10>(ptr); + if (ptr == nullptr) + return T(); + if (ptr[0] == '0') { + if (ptr[1] == 'b') + return P::template parse<2>(ptr + 2); + if (ptr[1] == 'x') + return P::template parse<16>(ptr + 2); + } + return P::template parse<10>(ptr); } } // namespace internal @@ -169,6 +172,16 @@ LIBC_INLINE constexpr auto operator""_u256(const char *x) { return internal::parse_with_prefix>(x); } +template LIBC_INLINE constexpr T parse_bigint(const char *ptr) { + if (ptr == nullptr) + return T(); + if (ptr[0] == '-' || ptr[0] == '+') { + auto positive = internal::parse_with_prefix(ptr + 1); + return ptr[0] == '-' ? -positive : positive; + } + return internal::parse_with_prefix(ptr); +} + } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_INTEGER_LITERALS_H diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index 70a8800..bb6424b 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -10,9 +10,9 @@ #ifndef LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H #define LLVM_LIBC_SRC___SUPPORT_MATH_EXTRAS_H -#include "src/__support/CPP/bit.h" // countl_one, countr_zero -#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits -#include "src/__support/CPP/type_traits.h" // is_unsigned_v +#include "src/__support/CPP/bit.h" // countl_one, countr_zero +#include "src/__support/CPP/limits.h" // CHAR_BIT, numeric_limits +#include "src/__support/CPP/type_traits.h" // is_unsigned_v, is_constant_evaluated #include "src/__support/macros/attributes.h" // LIBC_INLINE namespace LIBC_NAMESPACE { @@ -32,199 +32,94 @@ mask_trailing_ones() { template LIBC_INLINE constexpr cpp::enable_if_t, T> mask_leading_ones() { - constexpr T MASK(mask_trailing_ones()); - return T(~MASK); // bitwise NOT performs integer promotion. + return T(~mask_trailing_ones()); } -// Add with carry -template struct SumCarry { - T sum; - T carry; -}; - -// This version is always valid for constexpr. -template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> -add_with_carry_const(T a, T b, T carry_in) { - T tmp = a + carry_in; - T sum = b + tmp; - T carry_out = (sum < b) + (tmp < a); - return {sum, carry_out}; -} - -template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, SumCarry> -add_with_carry(T a, T b, T carry_in) { - return add_with_carry_const(a, b, carry_in); -} - -#if __has_builtin(__builtin_addc) -// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned char a, unsigned char b, - unsigned char carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcb(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned short a, unsigned short b, - unsigned short carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcs(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned int a, unsigned int b, - unsigned int carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addc(a, b, carry_in, &result.carry); - return result; - } -} - -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned long a, unsigned long b, - unsigned long carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcl(a, b, carry_in, &result.carry); - return result; - } +// Create a bitmask with the count right-most bits set to 0, and all other bits +// set to 1. Only unsigned types are allowed. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_trailing_zeros() { + return mask_leading_ones(); } -template <> -LIBC_INLINE constexpr SumCarry -add_with_carry(unsigned long long a, unsigned long long b, - unsigned long long carry_in) { - if (__builtin_is_constant_evaluated()) { - return add_with_carry_const(a, b, carry_in); - } else { - SumCarry result{0, 0}; - result.sum = __builtin_addcll(a, b, carry_in, &result.carry); - return result; - } +// Create a bitmask with the count left-most bits set to 0, and all other bits +// set to 1. Only unsigned types are allowed. +template +LIBC_INLINE constexpr cpp::enable_if_t, T> +mask_leading_zeros() { + return mask_trailing_ones(); } -#endif // __has_builtin(__builtin_addc) - -// Subtract with borrow -template struct DiffBorrow { - T diff; - T borrow; -}; - -// This version is always valid for constexpr. +// Returns whether 'a + b' overflows, the result is stored in 'res'. template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> -sub_with_borrow_const(T a, T b, T borrow_in) { - T tmp = a - b; - T diff = tmp - borrow_in; - T borrow_out = (diff > tmp) + (tmp > a); - return {diff, borrow_out}; +[[nodiscard]] LIBC_INLINE constexpr bool add_overflow(T a, T b, T &res) { + return __builtin_add_overflow(a, b, &res); } -// This version is not always valid for constepxr because it's overriden below -// if builtins are available. +// Returns whether 'a - b' overflows, the result is stored in 'res'. template -LIBC_INLINE constexpr cpp::enable_if_t< - cpp::is_integral_v && cpp::is_unsigned_v, DiffBorrow> -sub_with_borrow(T a, T b, T borrow_in) { - return sub_with_borrow_const(a, b, borrow_in); +[[nodiscard]] LIBC_INLINE constexpr bool sub_overflow(T a, T b, T &res) { + return __builtin_sub_overflow(a, b, &res); } -#if __has_builtin(__builtin_subc) -// https://clang.llvm.org/docs/LanguageExtensions.html#multiprecision-arithmetic-builtins - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned char a, unsigned char b, - unsigned char borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcb(a, b, borrow_in, &result.borrow); - return result; - } -} +#define RETURN_IF(TYPE, BUILTIN) \ + if constexpr (cpp::is_same_v) \ + return BUILTIN(a, b, carry_in, carry_out); -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned short a, unsigned short b, - unsigned short borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcs(a, b, borrow_in, &result.borrow); - return result; - } -} - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned int a, unsigned int b, - unsigned int borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subc(a, b, borrow_in, &result.borrow); - return result; - } -} - -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned long a, unsigned long b, - unsigned long borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcl(a, b, borrow_in, &result.borrow); - return result; +// Returns the result of 'a + b' taking into account 'carry_in'. +// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. +// We keep the pass by pointer interface for consistency with the intrinsic. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +add_with_carry(T a, T b, T carry_in, T &carry_out) { + if constexpr (!cpp::is_constant_evaluated()) { +#if __has_builtin(__builtin_addcb) + RETURN_IF(unsigned char, __builtin_addcb) +#elif __has_builtin(__builtin_addcs) + RETURN_IF(unsigned short, __builtin_addcs) +#elif __has_builtin(__builtin_addc) + RETURN_IF(unsigned int, __builtin_addc) +#elif __has_builtin(__builtin_addcl) + RETURN_IF(unsigned long, __builtin_addcl) +#elif __has_builtin(__builtin_addcll) + RETURN_IF(unsigned long long, __builtin_addcll) +#endif } + T sum; + T carry1 = add_overflow(a, b, sum); + T carry2 = add_overflow(sum, carry_in, sum); + carry_out = carry1 | carry2; + return sum; } -template <> -LIBC_INLINE constexpr DiffBorrow -sub_with_borrow(unsigned long long a, unsigned long long b, - unsigned long long borrow_in) { - if (__builtin_is_constant_evaluated()) { - return sub_with_borrow_const(a, b, borrow_in); - } else { - DiffBorrow result{0, 0}; - result.diff = __builtin_subcll(a, b, borrow_in, &result.borrow); - return result; +// Returns the result of 'a - b' taking into account 'carry_in'. +// The carry out is stored in 'carry_out' it not 'nullptr', dropped otherwise. +// We keep the pass by pointer interface for consistency with the intrinsic. +template +[[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, T> +sub_with_borrow(T a, T b, T carry_in, T &carry_out) { + if constexpr (!cpp::is_constant_evaluated()) { +#if __has_builtin(__builtin_subcb) + RETURN_IF(unsigned char, __builtin_subcb) +#elif __has_builtin(__builtin_subcs) + RETURN_IF(unsigned short, __builtin_subcs) +#elif __has_builtin(__builtin_subc) + RETURN_IF(unsigned int, __builtin_subc) +#elif __has_builtin(__builtin_subcl) + RETURN_IF(unsigned long, __builtin_subcl) +#elif __has_builtin(__builtin_subcll) + RETURN_IF(unsigned long long, __builtin_subcll) +#endif } + T sub; + T carry1 = sub_overflow(a, b, sub); + T carry2 = sub_overflow(sub, carry_in, sub); + carry_out = carry1 | carry2; + return sub; } -#endif // __has_builtin(__builtin_subc) +#undef RETURN_IF template [[nodiscard]] LIBC_INLINE constexpr cpp::enable_if_t, int> diff --git a/libc/src/__support/number_pair.h b/libc/src/__support/number_pair.h index ee6667b..2f713fc 100644 --- a/libc/src/__support/number_pair.h +++ b/libc/src/__support/number_pair.h @@ -20,17 +20,6 @@ template struct NumberPair { T hi = T(0); }; -template -cpp::enable_if_t && cpp::is_unsigned_v, - NumberPair> constexpr split(T a) { - constexpr size_t HALF_BIT_WIDTH = sizeof(T) * 4; - constexpr T LOWER_HALF_MASK = (T(1) << HALF_BIT_WIDTH) - T(1); - NumberPair result; - result.lo = a & LOWER_HALF_MASK; - result.hi = a >> HALF_BIT_WIDTH; - return result; -} - } // namespace LIBC_NAMESPACE #endif // LLVM_LIBC_SRC___SUPPORT_NUMBER_PAIR_H diff --git a/libc/test/src/__support/integer_literals_test.cpp b/libc/test/src/__support/integer_literals_test.cpp index 5298cf3..cbc906a 100644 --- a/libc/test/src/__support/integer_literals_test.cpp +++ b/libc/test/src/__support/integer_literals_test.cpp @@ -133,3 +133,24 @@ TEST(LlvmLibcIntegerLiteralTest, u256) { U256_MAX, 0xFFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF'FFFFFFFF_u256); } + +TEST(LlvmLibcIntegerLiteralTest, parse_bigint) { + using T = LIBC_NAMESPACE::Int<128>; + struct { + const char *str; + T expected; + } constexpr TEST_CASES[] = { + {"0", 0}, {"-1", -1}, {"+1", 1}, {"-0xFF", -255}, {"-0b11", -3}, + }; + for (auto tc : TEST_CASES) { + T actual = LIBC_NAMESPACE::parse_bigint(tc.str); + EXPECT_EQ(actual, tc.expected); + } +} + +TEST(LlvmLibcIntegerLiteralTest, parse_bigint_invalid) { + using T = LIBC_NAMESPACE::Int<128>; + const T expected; // default construction + EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(nullptr), expected); + EXPECT_EQ(LIBC_NAMESPACE::parse_bigint(""), expected); +} diff --git a/libc/test/src/__support/math_extras_test.cpp b/libc/test/src/__support/math_extras_test.cpp index e88b3e1..401e631e 100644 --- a/libc/test/src/__support/math_extras_test.cpp +++ b/libc/test/src/__support/math_extras_test.cpp @@ -101,4 +101,61 @@ TYPED_TEST(LlvmLibcBitTest, CountZeros, UnsignedTypesNoBigInt) { EXPECT_EQ(count_zeros(cpp::numeric_limits::max() >> i), i); } +using UnsignedTypes = testing::TypeList< +#if defined(__SIZEOF_INT128__) + __uint128_t, +#endif + unsigned char, unsigned short, unsigned int, unsigned long, + unsigned long long>; + +TYPED_TEST(LlvmLibcBlockMathExtrasTest, add_overflow, UnsignedTypes) { + constexpr T ZERO = cpp::numeric_limits::min(); + constexpr T ONE(1); + constexpr T MAX = cpp::numeric_limits::max(); + constexpr T BEFORE_MAX = MAX - 1; + + const struct { + T lhs; + T rhs; + T sum; + bool carry; + } TESTS[] = { + {ZERO, ONE, ONE, false}, // 0x00 + 0x01 = 0x01 + {BEFORE_MAX, ONE, MAX, false}, // 0xFE + 0x01 = 0xFF + {MAX, ONE, ZERO, true}, // 0xFF + 0x01 = 0x00 (carry) + {MAX, MAX, BEFORE_MAX, true}, // 0xFF + 0xFF = 0xFE (carry) + }; + for (auto tc : TESTS) { + T sum; + bool carry = add_overflow(tc.lhs, tc.rhs, sum); + EXPECT_EQ(sum, tc.sum); + EXPECT_EQ(carry, tc.carry); + } +} + +TYPED_TEST(LlvmLibcBlockMathExtrasTest, sub_overflow, UnsignedTypes) { + constexpr T ZERO = cpp::numeric_limits::min(); + constexpr T ONE(1); + constexpr T MAX = cpp::numeric_limits::max(); + constexpr T BEFORE_MAX = MAX - 1; + + const struct { + T lhs; + T rhs; + T sub; + bool carry; + } TESTS[] = { + {ONE, ZERO, ONE, false}, // 0x01 - 0x00 = 0x01 + {MAX, MAX, ZERO, false}, // 0xFF - 0xFF = 0x00 + {ZERO, ONE, MAX, true}, // 0x00 - 0x01 = 0xFF (carry) + {BEFORE_MAX, MAX, MAX, true}, // 0xFE - 0xFF = 0xFF (carry) + }; + for (auto tc : TESTS) { + T sub; + bool carry = sub_overflow(tc.lhs, tc.rhs, sub); + EXPECT_EQ(sub, tc.sub); + EXPECT_EQ(carry, tc.carry); + } +} + } // namespace LIBC_NAMESPACE diff --git a/libc/test/src/__support/uint_test.cpp b/libc/test/src/__support/uint_test.cpp index 5764324..5696e54 100644 --- a/libc/test/src/__support/uint_test.cpp +++ b/libc/test/src/__support/uint_test.cpp @@ -8,6 +8,7 @@ #include "src/__support/CPP/optional.h" #include "src/__support/UInt.h" +#include "src/__support/integer_literals.h" // parse_unsigned_bigint #include "src/__support/macros/properties/types.h" // LIBC_TYPES_HAS_INT128 #include "include/llvm-libc-macros/math-macros.h" // HUGE_VALF, HUGE_VALF @@ -15,6 +16,195 @@ namespace LIBC_NAMESPACE { +enum Value { ZERO, ONE, TWO, MIN, MAX }; + +template auto create(Value value) { + switch (value) { + case ZERO: + return T(0); + case ONE: + return T(1); + case TWO: + return T(2); + case MIN: + return T::min(); + case MAX: + return T::max(); + } +} + +using Types = testing::TypeList< // +#ifdef LIBC_TYPES_HAS_INT64 + BigInt<64, false, uint64_t>, // 64-bits unsigned (1 x uint64_t) + BigInt<64, true, uint64_t>, // 64-bits signed (1 x uint64_t) +#endif +#ifdef LIBC_TYPES_HAS_INT128 + BigInt<128, false, __uint128_t>, // 128-bits unsigned (1 x __uint128_t) + BigInt<128, true, __uint128_t>, // 128-bits signed (1 x __uint128_t) +#endif + BigInt<16, false, uint16_t>, // 16-bits unsigned (1 x uint16_t) + BigInt<16, true, uint16_t>, // 16-bits signed (1 x uint16_t) + BigInt<64, false, uint16_t>, // 64-bits unsigned (4 x uint16_t) + BigInt<64, true, uint16_t> // 64-bits signed (4 x uint16_t) + >; + +#define ASSERT_SAME(A, B) ASSERT_TRUE((A) == (B)) + +TYPED_TEST(LlvmLibcUIntClassTest, Additions, Types) { + ASSERT_SAME(create(ZERO) + create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) + create(ZERO), create(ONE)); + ASSERT_SAME(create(ZERO) + create(ONE), create(ONE)); + ASSERT_SAME(create(ONE) + create(ONE), create(TWO)); + // 2's complement addition works for signed and unsigned types. + // - unsigned : 0xff + 0x01 = 0x00 (255 + 1 = 0) + // - signed : 0xef + 0x01 = 0xf0 (127 + 1 = -128) + ASSERT_SAME(create(MAX) + create(ONE), create(MIN)); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Subtraction, Types) { + ASSERT_SAME(create(ZERO) - create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) - create(ONE), create(ZERO)); + ASSERT_SAME(create(ONE) - create(ZERO), create(ONE)); + // 2's complement subtraction works for signed and unsigned types. + // - unsigned : 0x00 - 0x01 = 0xff ( 0 - 1 = 255) + // - signed : 0xf0 - 0x01 = 0xef (-128 - 1 = 127) + ASSERT_SAME(create(MIN) - create(ONE), create(MAX)); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Multiplication, Types) { + ASSERT_SAME(create(ZERO) * create(ZERO), create(ZERO)); + ASSERT_SAME(create(ZERO) * create(ONE), create(ZERO)); + ASSERT_SAME(create(ONE) * create(ZERO), create(ZERO)); + ASSERT_SAME(create(ONE) * create(ONE), create(ONE)); + ASSERT_SAME(create(ONE) * create(TWO), create(TWO)); + ASSERT_SAME(create(TWO) * create(ONE), create(TWO)); + // - unsigned : 0xff x 0xff = 0x01 (mod 0xff) + // - signed : 0xef x 0xef = 0x01 (mod 0xff) + ASSERT_SAME(create(MAX) * create(MAX), create(ONE)); +} + +template void print(const char *msg, T value) { + testing::tlog << msg; + IntegerToString buffer(value); + testing::tlog << buffer.view() << "\n"; +} + +TEST(LlvmLibcUIntClassTest, SignedAddSub) { + // Computations performed by https://www.wolframalpha.com/ + using T = BigInt<128, true, uint32_t>; + const T a = parse_bigint("1927508279017230597"); + const T b = parse_bigint("278789278723478925"); + const T s = parse_bigint("2206297557740709522"); + // Addition + ASSERT_SAME(a + b, s); + ASSERT_SAME(b + a, s); // commutative + // Subtraction + ASSERT_SAME(a - s, -b); + ASSERT_SAME(s - a, b); +} + +TEST(LlvmLibcUIntClassTest, SignedMulDiv) { + // Computations performed by https://www.wolframalpha.com/ + using T = BigInt<128, true, uint16_t>; + struct { + const char *a; + const char *b; + const char *mul; + } const test_cases[] = {{"-4", "3", "-12"}, + {"-3", "-3", "9"}, + {"1927508279017230597", "278789278723478925", + "537368642840747885329125014794668225"}}; + for (auto tc : test_cases) { + const T a = parse_bigint(tc.a); + const T b = parse_bigint(tc.b); + const T mul = parse_bigint(tc.mul); + // Multiplication + ASSERT_SAME(a * b, mul); + ASSERT_SAME(b * a, mul); // commutative + ASSERT_SAME(a * -b, -mul); // sign + ASSERT_SAME(-a * b, -mul); // sign + ASSERT_SAME(-a * -b, mul); // sign + // Division + ASSERT_SAME(mul / a, b); + ASSERT_SAME(mul / b, a); + ASSERT_SAME(-mul / a, -b); // sign + ASSERT_SAME(mul / -a, -b); // sign + ASSERT_SAME(-mul / -a, b); // sign + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, Division, Types) { + ASSERT_SAME(create(ZERO) / create(ONE), create(ZERO)); + ASSERT_SAME(create(MAX) / create(ONE), create(MAX)); + ASSERT_SAME(create(MAX) / create(MAX), create(ONE)); + ASSERT_SAME(create(ONE) / create(ONE), create(ONE)); + if constexpr (T::SIGNED) { + // Special case found by fuzzing. + ASSERT_SAME(create(MIN) / create(MIN), create(ONE)); + } + // - unsigned : 0xff / 0x02 = 0x7f + // - signed : 0xef / 0x02 = 0x77 + ASSERT_SAME(create(MAX) / create(TWO), (create(MAX) >> 1)); + + using word_type = typename T::word_type; + const T zero_one_repeated = T::all_ones() / T(0xff); + const word_type pattern = word_type(~0) / word_type(0xff); + for (const word_type part : zero_one_repeated.val) { + if constexpr (T::SIGNED == false) { + EXPECT_EQ(part, pattern); + } + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, is_neg, Types) { + EXPECT_FALSE(create(ZERO).is_neg()); + EXPECT_FALSE(create(ONE).is_neg()); + EXPECT_FALSE(create(TWO).is_neg()); + EXPECT_EQ(create(MIN).is_neg(), T::SIGNED); + EXPECT_FALSE(create(MAX).is_neg()); +} + +TYPED_TEST(LlvmLibcUIntClassTest, Masks, Types) { + if constexpr (!T::SIGNED) { + constexpr size_t BITS = T::BITS; + // mask_trailing_ones + ASSERT_SAME((mask_trailing_ones()), T::zero()); + ASSERT_SAME((mask_trailing_ones()), T::one()); + ASSERT_SAME((mask_trailing_ones()), T::all_ones() >> 1); + ASSERT_SAME((mask_trailing_ones()), T::all_ones()); + // mask_leading_ones + ASSERT_SAME((mask_leading_ones()), T::zero()); + ASSERT_SAME((mask_leading_ones()), T::one() << (BITS - 1)); + ASSERT_SAME((mask_leading_ones()), T::all_ones() - T::one()); + ASSERT_SAME((mask_leading_ones()), T::all_ones()); + // mask_trailing_zeros + ASSERT_SAME((mask_trailing_zeros()), T::all_ones()); + ASSERT_SAME((mask_trailing_zeros()), T::all_ones() - T::one()); + ASSERT_SAME((mask_trailing_zeros()), T::one() << (BITS - 1)); + ASSERT_SAME((mask_trailing_zeros()), T::zero()); + // mask_trailing_zeros + ASSERT_SAME((mask_leading_zeros()), T::all_ones()); + ASSERT_SAME((mask_leading_zeros()), T::all_ones() >> 1); + ASSERT_SAME((mask_leading_zeros()), T::one()); + ASSERT_SAME((mask_leading_zeros()), T::zero()); + } +} + +TYPED_TEST(LlvmLibcUIntClassTest, CountBits, Types) { + if constexpr (!T::SIGNED) { + for (size_t i = 0; i <= T::BITS; ++i) { + const auto l_one = T::all_ones() << i; // 0b111...000 + const auto r_one = T::all_ones() >> i; // 0b000...111 + const int zeros = i; + const int ones = T::BITS - zeros; + ASSERT_EQ(cpp::countr_one(r_one), ones); + ASSERT_EQ(cpp::countl_one(l_one), ones); + ASSERT_EQ(cpp::countr_zero(l_one), zeros); + ASSERT_EQ(cpp::countl_zero(r_one), zeros); + } + } +} + using LL_UInt64 = UInt<64>; // We want to test UInt<128> explicitly. So, for // convenience, we use a sugar which does not conflict with the UInt128 type @@ -561,7 +751,7 @@ TEST(LlvmLibcUIntClassTest, FullMulTests) { LL_UInt##Bits a = ~LL_UInt##Bits(0); \ LL_UInt##Bits hi = a.quick_mul_hi(a); \ LL_UInt##Bits trunc = static_cast(a.ful_mul(a) >> Bits); \ - uint64_t overflow = trunc.sub(hi); \ + uint64_t overflow = trunc.sub_overflow(hi); \ EXPECT_EQ(overflow, uint64_t(0)); \ EXPECT_LE(uint64_t(trunc), uint64_t(Error)); \ } while (0) diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel index 4f97612..c0d402a8 100644 --- a/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/test/src/__support/BUILD.bazel @@ -87,6 +87,7 @@ libc_test( srcs = ["uint_test.cpp"], deps = [ "//libc:__support_cpp_optional", + "//libc:__support_integer_literals", "//libc:__support_macros_properties_types", "//libc:__support_uint", "//libc:llvm_libc_macros_math_macros", -- cgit v1.1 From 5334b31e7c41174a418afbfe132bd0a86a47a22e Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 4 Apr 2024 10:46:45 +0100 Subject: [mlir][OpenMP][NFC] Use SmallVectorImpl for function arguments (#86978) --- .../LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 646d0ed..cacf2c3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -804,13 +804,13 @@ convertOmpTaskgroupOp(omp::TaskgroupOp tgOp, llvm::IRBuilderBase &builder, /// Allocate space for privatized reduction variables. template -static void -allocByValReductionVars(T loop, llvm::IRBuilderBase &builder, - LLVM::ModuleTranslation &moduleTranslation, - llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, - SmallVector &reductionDecls, - SmallVector &privateReductionVariables, - DenseMap &reductionVariableMap) { +static void allocByValReductionVars( + T loop, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP, + SmallVectorImpl &reductionDecls, + SmallVectorImpl &privateReductionVariables, + DenseMap &reductionVariableMap) { llvm::IRBuilderBase::InsertPointGuard guard(builder); builder.restoreIP(allocaIP); auto args = -- cgit v1.1 From ed5fe66370cb0ea88913458d71959407dc7b1394 Mon Sep 17 00:00:00 2001 From: Stephen Tozer Date: Thu, 4 Apr 2024 10:53:36 +0100 Subject: [RemoveDIs][BC] Reject intrinsic->record upgrades for old-format modules (#87494) Fixes issue noted at: https://github.com/llvm/llvm-project/pull/86274 When loading bitcode lazily, we may request debug intrinsics be upgraded to debug records during the module parsing phase; later on we perform this upgrade when materializing the module functions. If we change the module's debug info format between parsing and materializing however, then the requested upgrade is no longer correct and leads to an assertion. This patch fixes the issue by adding an extra check in the autoupgrader to see if the upgrade is no longer suitable, and either exit-out or fall back to the correct intrinsic->intrinsic upgrade if one is required. --- llvm/include/llvm/IR/AutoUpgrade.h | 3 ++- llvm/lib/IR/AutoUpgrade.cpp | 39 ++++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/llvm/include/llvm/IR/AutoUpgrade.h b/llvm/include/llvm/IR/AutoUpgrade.h index 152f781..97c3e4d 100644 --- a/llvm/include/llvm/IR/AutoUpgrade.h +++ b/llvm/include/llvm/IR/AutoUpgrade.h @@ -36,7 +36,8 @@ namespace llvm { /// for upgrading, and returns true if it requires upgrading. It may return /// null in NewFn if the all calls to the original intrinsic function /// should be transformed to non-function-call instructions. - bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn); + bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, + bool CanUpgradeDebugIntrinsicsToRecords = true); /// This is the complement to the above, replacing a specific call to an /// intrinsic function with a call to the specified new function. diff --git a/llvm/lib/IR/AutoUpgrade.cpp b/llvm/lib/IR/AutoUpgrade.cpp index a44f6af..0f8c984 100644 --- a/llvm/lib/IR/AutoUpgrade.cpp +++ b/llvm/lib/IR/AutoUpgrade.cpp @@ -983,7 +983,8 @@ static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name) { return Intrinsic::not_intrinsic; } -static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) { +static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, + bool CanUpgradeDebugIntrinsicsToRecords) { assert(F && "Illegal to upgrade a non-existent Function."); StringRef Name = F->getName(); @@ -1057,7 +1058,8 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) { case 'd': if (Name.consume_front("dbg.")) { // Mark debug intrinsics for upgrade to new debug format. - if (F->getParent()->IsNewDbgInfoFormat) { + if (CanUpgradeDebugIntrinsicsToRecords && + F->getParent()->IsNewDbgInfoFormat) { if (Name == "addr" || Name == "value" || Name == "assign" || Name == "declare" || Name == "label") { // There's no function to replace these with. @@ -1413,9 +1415,11 @@ static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn) { return false; } -bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn) { +bool llvm::UpgradeIntrinsicFunction(Function *F, Function *&NewFn, + bool CanUpgradeDebugIntrinsicsToRecords) { NewFn = nullptr; - bool Upgraded = upgradeIntrinsicFunction1(F, NewFn); + bool Upgraded = + upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords); assert(F != NewFn && "Intrinsic function upgraded to the same function"); // Upgrade intrinsic attributes. This does not change the function. @@ -2412,6 +2416,7 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Builder.SetInsertPoint(CI->getParent(), CI->getIterator()); if (!NewFn) { + bool FallthroughToDefaultUpgrade = false; // Get the Function's name. StringRef Name = F->getName(); @@ -4262,16 +4267,30 @@ void llvm::UpgradeIntrinsicCall(CallBase *CI, Function *NewFn) { Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder); } else if (IsAMDGCN) { Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder); - } else if (IsDbg && CI->getModule()->IsNewDbgInfoFormat) { - upgradeDbgIntrinsicToDbgRecord(Name, CI); + } else if (IsDbg) { + // We might have decided we don't want the new format after all between + // first requesting the upgrade and now; skip the conversion if that is + // the case, and check here to see if the intrinsic needs to be upgraded + // normally. + if (!CI->getModule()->IsNewDbgInfoFormat) { + bool NeedsUpgrade = + upgradeIntrinsicFunction1(CI->getCalledFunction(), NewFn, false); + if (!NeedsUpgrade) + return; + FallthroughToDefaultUpgrade = true; + } else { + upgradeDbgIntrinsicToDbgRecord(Name, CI); + } } else { llvm_unreachable("Unknown function for CallBase upgrade."); } - if (Rep) - CI->replaceAllUsesWith(Rep); - CI->eraseFromParent(); - return; + if (!FallthroughToDefaultUpgrade) { + if (Rep) + CI->replaceAllUsesWith(Rep); + CI->eraseFromParent(); + return; + } } const auto &DefaultCase = [&]() -> void { -- cgit v1.1 From 918542d2ce550c0ac3c7e4d753318265f277631b Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Thu, 4 Apr 2024 10:55:24 +0100 Subject: ELFRelocs/AArch64: update canonical reference URL (NFC) (#86955) Update the URL of the reference to be used for AArch64.def, and add some comments. The canonical aaelf64 document can be found at: https://github.com/ARM-software/abi-aa/blob/main/aaelf64/aaelf64.rst --- .../llvm/BinaryFormat/ELFRelocs/AArch64.def | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def index 5fb3fa4..cb05db8 100644 --- a/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def +++ b/llvm/include/llvm/BinaryFormat/ELFRelocs/AArch64.def @@ -1,18 +1,19 @@ - #ifndef ELF_RELOC #error "ELF_RELOC must be defined" #endif -// Based on ABI release 1.1-beta, dated 6 November 2013. NB: The cover page of -// this document, IHI0056C_beta_aaelf64.pdf, on infocenter.arm.com, still -// labels this as release 1.0. +// Based on released ABI: https://github.com/ARM-software/abi-aa, aaelf64. +// ELF64 +// Null relocation: also 0x100 for ELF64 ELF_RELOC(R_AARCH64_NONE, 0) +// Data relocations ELF_RELOC(R_AARCH64_ABS64, 0x101) ELF_RELOC(R_AARCH64_ABS32, 0x102) ELF_RELOC(R_AARCH64_ABS16, 0x103) ELF_RELOC(R_AARCH64_PREL64, 0x104) ELF_RELOC(R_AARCH64_PREL32, 0x105) ELF_RELOC(R_AARCH64_PREL16, 0x106) +// Static AArch64 relocations ELF_RELOC(R_AARCH64_MOVW_UABS_G0, 0x107) ELF_RELOC(R_AARCH64_MOVW_UABS_G0_NC, 0x108) ELF_RELOC(R_AARCH64_MOVW_UABS_G1, 0x109) @@ -60,11 +61,13 @@ ELF_RELOC(R_AARCH64_LD64_GOT_LO12_NC, 0x138) ELF_RELOC(R_AARCH64_LD64_GOTPAGE_LO15, 0x139) ELF_RELOC(R_AARCH64_PLT32, 0x13a) ELF_RELOC(R_AARCH64_GOTPCREL32, 0x13b) +// General dynamic TLS relocations ELF_RELOC(R_AARCH64_TLSGD_ADR_PREL21, 0x200) ELF_RELOC(R_AARCH64_TLSGD_ADR_PAGE21, 0x201) ELF_RELOC(R_AARCH64_TLSGD_ADD_LO12_NC, 0x202) ELF_RELOC(R_AARCH64_TLSGD_MOVW_G1, 0x203) ELF_RELOC(R_AARCH64_TLSGD_MOVW_G0_NC, 0x204) +// Local dynamic TLS relocations ELF_RELOC(R_AARCH64_TLSLD_ADR_PREL21, 0x205) ELF_RELOC(R_AARCH64_TLSLD_ADR_PAGE21, 0x206) ELF_RELOC(R_AARCH64_TLSLD_ADD_LO12_NC, 0x207) @@ -92,6 +95,7 @@ ELF_RELOC(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC, 0x21c) ELF_RELOC(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21, 0x21d) ELF_RELOC(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC, 0x21e) ELF_RELOC(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19, 0x21f) +// Local exec TLS relocations ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G2, 0x220) ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1, 0x221) ELF_RELOC(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC, 0x222) @@ -108,6 +112,7 @@ ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12, 0x22c) ELF_RELOC(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC, 0x22d) ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12, 0x22e) ELF_RELOC(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC, 0x22f) +// TLS descriptor relocations ELF_RELOC(R_AARCH64_TLSDESC_LD_PREL19, 0x230) ELF_RELOC(R_AARCH64_TLSDESC_ADR_PREL21, 0x231) ELF_RELOC(R_AARCH64_TLSDESC_ADR_PAGE21, 0x232) @@ -122,8 +127,7 @@ ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12, 0x23a) ELF_RELOC(R_AARCH64_TLSLE_LDST128_TPREL_LO12_NC, 0x23b) ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12, 0x23c) ELF_RELOC(R_AARCH64_TLSLD_LDST128_DTPREL_LO12_NC, 0x23d) -ELF_RELOC(R_AARCH64_AUTH_ABS64, 0x244) -// Dynamic relocations start +// Dynamic relocations ELF_RELOC(R_AARCH64_COPY, 0x400) ELF_RELOC(R_AARCH64_GLOB_DAT, 0x401) ELF_RELOC(R_AARCH64_JUMP_SLOT, 0x402) @@ -136,8 +140,12 @@ ELF_RELOC(R_AARCH64_TLS_DTPREL64, 0x405) ELF_RELOC(R_AARCH64_TLS_TPREL64, 0x406) ELF_RELOC(R_AARCH64_TLSDESC, 0x407) ELF_RELOC(R_AARCH64_IRELATIVE, 0x408) +// PAuthABI static and dynamic relocations: defined in pauthabielf64, +// https://github.com/ARM-software/abi-aa +ELF_RELOC(R_AARCH64_AUTH_ABS64, 0x244) ELF_RELOC(R_AARCH64_AUTH_RELATIVE, 0x411) +// ELF32 // ELF_RELOC(R_AARCH64_P32_NONE, 0) ELF_RELOC(R_AARCH64_P32_ABS32, 0x001) ELF_RELOC(R_AARCH64_P32_ABS16, 0x002) @@ -216,7 +224,7 @@ ELF_RELOC(R_AARCH64_P32_TLSDESC_ADR_PAGE21, 0x07c) ELF_RELOC(R_AARCH64_P32_TLSDESC_LD32_LO12, 0x07d) ELF_RELOC(R_AARCH64_P32_TLSDESC_ADD_LO12, 0x07e) ELF_RELOC(R_AARCH64_P32_TLSDESC_CALL, 0x07f) -// Dynamic relocations start +// Dynamic relocations ELF_RELOC(R_AARCH64_P32_COPY, 0x0b4) ELF_RELOC(R_AARCH64_P32_GLOB_DAT, 0x0b5) ELF_RELOC(R_AARCH64_P32_JUMP_SLOT, 0x0b6) -- cgit v1.1 From 099ecdf1ec2f87b5bae74518166daf1d2b09da45 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 4 Apr 2024 10:55:42 +0100 Subject: [mlir][OpenMP] map argument to reduction initialization region (#86979) The argument to the initialization region of reduction declarations was never mapped. This meant that if this argument was accessed inside the initialization region, that mlir operation would be translated to an llvm operation with a null argument (failing verification). Adding the mapping ensures that the right LLVM value can be found when inlining and converting the initialization region. We have to separately establish and clean up these mappings for each use of the reduction declaration because repeated usage of the same declaration will inline it using a different concrete value for the block argument. This argument was never used previously because for most cases the initialized value depends only upon the type of the reduction, not on the original variable. It is needed now so that we can read the array extents for the local copy from the mold. Flang support for reductions on assumed shape arrays patch 2/3 --- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 35 +++++++ .../Target/LLVMIR/openmp-reduction-init-arg.mlir | 111 +++++++++++++++++++++ 2 files changed, 146 insertions(+) create mode 100644 mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index cacf2c3..c4bf6a2 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -825,6 +825,25 @@ static void allocByValReductionVars( } } +/// Map input argument to all reduction initialization regions +template +static void +mapInitializationArg(T loop, LLVM::ModuleTranslation &moduleTranslation, + SmallVectorImpl &reductionDecls, + unsigned i) { + // map input argument to the initialization region + mlir::omp::DeclareReductionOp &reduction = reductionDecls[i]; + Region &initializerRegion = reduction.getInitializerRegion(); + Block &entry = initializerRegion.front(); + assert(entry.getNumArguments() == 1 && + "the initialization region has one argument"); + + mlir::Value mlirSource = loop.getReductionVars()[i]; + llvm::Value *llvmSource = moduleTranslation.lookupValue(mlirSource); + assert(llvmSource && "lookup reduction var"); + moduleTranslation.mapValue(entry.getArgument(0), llvmSource); +} + /// Collect reduction info template static void collectReductionInfo( @@ -902,6 +921,10 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, loop.getRegion().getArguments().take_back(loop.getNumReductionVars()); for (unsigned i = 0; i < loop.getNumReductionVars(); ++i) { SmallVector phis; + + // map block argument to initializer region + mapInitializationArg(loop, moduleTranslation, reductionDecls, i); + if (failed(inlineConvertOmpRegions(reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", builder, moduleTranslation, &phis))) @@ -925,6 +948,11 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, builder.CreateStore(phis[0], privateReductionVariables[i]); // the rest was handled in allocByValReductionVars } + + // forget the mapping for the initializer region because we might need a + // different mapping if this reduction declaration is re-used for a + // different variable + moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); } // Store the mapping between reduction variables and their private copies on @@ -1118,6 +1146,9 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, opInst.getNumReductionVars()); for (unsigned i = 0; i < opInst.getNumReductionVars(); ++i) { SmallVector phis; + + // map the block argument + mapInitializationArg(opInst, moduleTranslation, reductionDecls, i); if (failed(inlineConvertOmpRegions( reductionDecls[i].getInitializerRegion(), "omp.reduction.neutral", builder, moduleTranslation, &phis))) @@ -1144,6 +1175,10 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, builder.CreateStore(phis[0], privateReductionVariables[i]); // the rest is done in allocByValReductionVars } + + // clear block argument mapping in case it needs to be re-created with a + // different source for another use of the same reduction decl + moduleTranslation.forgetMapping(reductionDecls[i].getInitializerRegion()); } // Store the mapping between reduction variables and their private copies on diff --git a/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir new file mode 100644 index 0000000..5dd31c4 --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-reduction-init-arg.mlir @@ -0,0 +1,111 @@ +// RUN: mlir-translate -mlir-to-llvmir %s | FileCheck %s + +// Test that the block argument to the initialization region of +// omp.declare_reduction gets mapped properly when translating to LLVMIR. + +module { + omp.declare_reduction @add_reduction_byref_box_Uxf64 : !llvm.ptr init { + ^bb0(%arg0: !llvm.ptr): +// test usage of %arg0: + %11 = llvm.load %arg0 : !llvm.ptr -> !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> + omp.yield(%arg0 : !llvm.ptr) + } combiner { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + omp.yield(%arg0 : !llvm.ptr) + } + + llvm.func internal @_QFPreduce(%arg0: !llvm.ptr {fir.bindc_name = "r"}, %arg1: !llvm.ptr {fir.bindc_name = "r2"}) attributes {sym_visibility = "private"} { + %8 = llvm.mlir.constant(1 : i32) : i32 + %9 = llvm.mlir.constant(10 : i32) : i32 + %10 = llvm.mlir.constant(0 : i32) : i32 + %83 = llvm.mlir.constant(1 : i64) : i64 + %84 = llvm.alloca %83 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr + %86 = llvm.mlir.constant(1 : i64) : i64 + %87 = llvm.alloca %86 x !llvm.struct<(ptr, i64, i32, i8, i8, i8, i8, array<1 x array<3 x i64>>)> : (i64) -> !llvm.ptr +// test multiple reduction variables to ensure they don't intefere with eachother +// when inlining the reduction init region multiple times + omp.parallel byref reduction(@add_reduction_byref_box_Uxf64 %84 -> %arg3 : !llvm.ptr, @add_reduction_byref_box_Uxf64 %87 -> %arg4 : !llvm.ptr) { + omp.terminator + } + llvm.return + } +} + +// CHECK-LABEL: define internal void @_QFPreduce +// CHECK: %[[VAL_0:.*]] = alloca { ptr, ptr }, align 8 +// CHECK: %[[VAL_1:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: %[[VAL_2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 +// CHECK: br label %[[VAL_3:.*]] +// CHECK: entry: ; preds = %[[VAL_4:.*]] +// CHECK: %[[VAL_5:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: br label %[[VAL_6:.*]] +// CHECK: omp_parallel: ; preds = %[[VAL_3]] +// CHECK: %[[VAL_7:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 0 +// CHECK: store ptr %[[VAL_1]], ptr %[[VAL_7]], align 8 +// CHECK: %[[VAL_8:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_0]], i32 0, i32 1 +// CHECK: store ptr %[[VAL_2]], ptr %[[VAL_8]], align 8 +// CHECK: call void (ptr, i32, ptr, ...) @__kmpc_fork_call(ptr @1, i32 1, ptr @_QFPreduce..omp_par, ptr %[[VAL_0]]) +// CHECK: br label %[[VAL_9:.*]] +// CHECK: omp.par.outlined.exit: ; preds = %[[VAL_6]] +// CHECK: br label %[[VAL_10:.*]] +// CHECK: omp.par.exit.split: ; preds = %[[VAL_9]] +// CHECK: ret void +// CHECK: omp.par.entry: +// CHECK: %[[VAL_11:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12:.*]], i32 0, i32 0 +// CHECK: %[[VAL_13:.*]] = load ptr, ptr %[[VAL_11]], align 8 +// CHECK: %[[VAL_14:.*]] = getelementptr { ptr, ptr }, ptr %[[VAL_12]], i32 0, i32 1 +// CHECK: %[[VAL_15:.*]] = load ptr, ptr %[[VAL_14]], align 8 +// CHECK: %[[VAL_16:.*]] = alloca i32, align 4 +// CHECK: %[[VAL_17:.*]] = load i32, ptr %[[VAL_18:.*]], align 4 +// CHECK: store i32 %[[VAL_17]], ptr %[[VAL_16]], align 4 +// CHECK: %[[VAL_19:.*]] = load i32, ptr %[[VAL_16]], align 4 +// CHECK: %[[VAL_20:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_13]], align 8 +// CHECK: %[[VAL_21:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[VAL_13]], ptr %[[VAL_21]], align 8 +// CHECK: %[[VAL_22:.*]] = load { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, ptr %[[VAL_15]], align 8 +// CHECK: %[[VAL_23:.*]] = alloca ptr, align 8 +// CHECK: store ptr %[[VAL_15]], ptr %[[VAL_23]], align 8 +// CHECK: %[[VAL_24:.*]] = alloca [2 x ptr], align 8 +// CHECK: br label %[[VAL_25:.*]] +// CHECK: omp.par.region: ; preds = %[[VAL_26:.*]] +// CHECK: br label %[[VAL_27:.*]] +// CHECK: omp.par.region1: ; preds = %[[VAL_25]] +// CHECK: br label %[[VAL_28:.*]] +// CHECK: omp.region.cont: ; preds = %[[VAL_27]] +// CHECK: %[[VAL_29:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_24]], i64 0, i64 0 +// CHECK: store ptr %[[VAL_21]], ptr %[[VAL_29]], align 8 +// CHECK: %[[VAL_30:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_24]], i64 0, i64 1 +// CHECK: store ptr %[[VAL_23]], ptr %[[VAL_30]], align 8 +// CHECK: %[[VAL_31:.*]] = call i32 @__kmpc_global_thread_num(ptr @1) +// CHECK: %[[VAL_32:.*]] = call i32 @__kmpc_reduce(ptr @1, i32 %[[VAL_31]], i32 2, i64 16, ptr %[[VAL_24]], ptr @.omp.reduction.func, ptr @.gomp_critical_user_.reduction.var) +// CHECK: switch i32 %[[VAL_32]], label %[[VAL_33:.*]] [ +// CHECK: i32 1, label %[[VAL_34:.*]] +// CHECK: i32 2, label %[[VAL_35:.*]] +// CHECK: ] +// CHECK: reduce.switch.atomic: ; preds = %[[VAL_28]] +// CHECK: unreachable +// CHECK: reduce.switch.nonatomic: ; preds = %[[VAL_28]] +// CHECK: %[[VAL_36:.*]] = load ptr, ptr %[[VAL_21]], align 8 +// CHECK: %[[VAL_37:.*]] = load ptr, ptr %[[VAL_23]], align 8 +// CHECK: call void @__kmpc_end_reduce(ptr @1, i32 %[[VAL_31]], ptr @.gomp_critical_user_.reduction.var) +// CHECK: br label %[[VAL_33]] +// CHECK: reduce.finalize: ; preds = %[[VAL_34]], %[[VAL_28]] +// CHECK: br label %[[VAL_38:.*]] +// CHECK: omp.par.pre_finalize: ; preds = %[[VAL_33]] +// CHECK: br label %[[VAL_39:.*]] +// CHECK: omp.par.outlined.exit.exitStub: ; preds = %[[VAL_38]] +// CHECK: ret void +// CHECK: %[[VAL_40:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_41:.*]], i64 0, i64 0 +// CHECK: %[[VAL_42:.*]] = load ptr, ptr %[[VAL_40]], align 8 +// CHECK: %[[VAL_43:.*]] = load ptr, ptr %[[VAL_42]], align 8 +// CHECK: %[[VAL_44:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_45:.*]], i64 0, i64 0 +// CHECK: %[[VAL_46:.*]] = load ptr, ptr %[[VAL_44]], align 8 +// CHECK: %[[VAL_47:.*]] = load ptr, ptr %[[VAL_46]], align 8 +// CHECK: %[[VAL_48:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_41]], i64 0, i64 1 +// CHECK: %[[VAL_49:.*]] = load ptr, ptr %[[VAL_48]], align 8 +// CHECK: %[[VAL_50:.*]] = load ptr, ptr %[[VAL_49]], align 8 +// CHECK: %[[VAL_51:.*]] = getelementptr inbounds [2 x ptr], ptr %[[VAL_45]], i64 0, i64 1 +// CHECK: %[[VAL_52:.*]] = load ptr, ptr %[[VAL_51]], align 8 +// CHECK: %[[VAL_53:.*]] = load ptr, ptr %[[VAL_52]], align 8 +// CHECK: ret void + -- cgit v1.1 From a9d963fdf81dc5c6221a0e915821877c35096aff Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 11:01:52 +0100 Subject: [DAG] SoftenFloatResult - add clang-format off/on tags around switch statement. NFC. Stop clang-format from trying to put all the case on separate lines. --- llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 3332c02..a8b1f41 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -53,6 +53,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { SDValue R = SDValue(); switch (N->getOpcode()) { + // clang-format off default: #ifndef NDEBUG dbgs() << "SoftenFloatResult #" << ResNo << ": "; @@ -115,9 +116,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: case ISD::FLDEXP: case ISD::STRICT_FLDEXP: R = SoftenFloatRes_ExpOp(N); break; - case ISD::FFREXP: - R = SoftenFloatRes_FFREXP(N); - break; + case ISD::FFREXP: R = SoftenFloatRes_FFREXP(N); break; case ISD::STRICT_FREM: case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::STRICT_FRINT: @@ -150,14 +149,11 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::VECREDUCE_FMIN: case ISD::VECREDUCE_FMAX: case ISD::VECREDUCE_FMAXIMUM: - case ISD::VECREDUCE_FMINIMUM: - R = SoftenFloatRes_VECREDUCE(N); - break; + case ISD::VECREDUCE_FMINIMUM: R = SoftenFloatRes_VECREDUCE(N); break; case ISD::VECREDUCE_SEQ_FADD: - case ISD::VECREDUCE_SEQ_FMUL: - R = SoftenFloatRes_VECREDUCE_SEQ(N); - break; - } + case ISD::VECREDUCE_SEQ_FMUL: R = SoftenFloatRes_VECREDUCE_SEQ(N); break; + // clang-format on + } // If R is null, the sub-method took care of registering the result. if (R.getNode()) { -- cgit v1.1 From d799be8154d6cb9f5a5b0f6a5c41923313731a85 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 4 Apr 2024 12:06:28 +0200 Subject: [flang] implement passing assumed-size array to assumed-rank arguments (#87511) Remove the TODO, the patch that ensured that the descriptor upper bound is set to -1 was https://github.com/llvm/llvm-project/pull/79156. --- flang/lib/Lower/ConvertCall.cpp | 9 --------- flang/test/Lower/HLFIR/assumed-rank-iface.f90 | 23 +++++++++++++++++------ 2 files changed, 17 insertions(+), 15 deletions(-) diff --git a/flang/lib/Lower/ConvertCall.cpp b/flang/lib/Lower/ConvertCall.cpp index 6eba243..315a3f6 100644 --- a/flang/lib/Lower/ConvertCall.cpp +++ b/flang/lib/Lower/ConvertCall.cpp @@ -1340,15 +1340,6 @@ static PreparedDummyArgument preparePresentUserCallActualArgument( } else { addr = hlfir::genVariableRawAddress(loc, builder, entity); } - // The last extent created for assumed-rank descriptors must be -1 (18.5.3 - // point 5.). This should be done when creating the assumed-size shape for - // consistency. - if (auto baseBoxDummy = mlir::dyn_cast(dummyType)) - if (baseBoxDummy.isAssumedRank()) - if (const Fortran::semantics::Symbol *sym = - Fortran::evaluate::UnwrapWholeSymbolDataRef(*arg.entity)) - if (Fortran::semantics::IsAssumedSizeArray(sym->GetUltimate())) - TODO(loc, "passing assumed-size to assumed-rank array"); // For ranked actual passed to assumed-rank dummy, the cast to assumed-rank // box is inserted when building the fir.call op. Inserting it here would diff --git a/flang/test/Lower/HLFIR/assumed-rank-iface.f90 b/flang/test/Lower/HLFIR/assumed-rank-iface.f90 index 5df7944..155ce8f 100644 --- a/flang/test/Lower/HLFIR/assumed-rank-iface.f90 +++ b/flang/test/Lower/HLFIR/assumed-rank-iface.f90 @@ -133,9 +133,20 @@ end subroutine ! CHECK: %[[VAL_11:.*]] = fir.convert %[[VAL_7]] : (!fir.box>) -> !fir.box> ! CHECK: fir.call @_QPint_opt_assumed_rank(%[[VAL_11]]) fastmath : (!fir.box>) -> () -! TODO: set assumed size last extent to -1. -!subroutine int_r2_assumed_size_to_assumed_rank(x) -! use ifaces, only : int_assumed_rank -! integer :: x(10, *) -! call int_assumed_rank(x) -!end subroutine +subroutine int_r2_assumed_size_to_assumed_rank(x) + use ifaces, only : int_assumed_rank + integer :: x(10, *) + call int_assumed_rank(x) +end subroutine +! CHECK-LABEL: func.func @_QPint_r2_assumed_size_to_assumed_rank( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref> {fir.bindc_name = "x"}) { +! CHECK: %[[VAL_1:.*]] = arith.constant 10 : i64 +! CHECK: %[[VAL_2:.*]] = fir.convert %[[VAL_1]] : (i64) -> index +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]] = arith.cmpi sgt, %[[VAL_2]], %[[VAL_3]] : index +! CHECK: %[[VAL_5:.*]] = arith.select %[[VAL_4]], %[[VAL_2]], %[[VAL_3]] : index +! CHECK: %[[VAL_6:.*]] = arith.constant -1 : index +! CHECK: %[[VAL_7:.*]] = fir.shape %[[VAL_5]], %[[VAL_6]] : (index, index) -> !fir.shape<2> +! CHECK: %[[VAL_8:.*]]:2 = hlfir.declare %[[VAL_0]](%[[VAL_7]]) {uniq_name = "_QFint_r2_assumed_size_to_assumed_rankEx"} : (!fir.ref>, !fir.shape<2>) -> (!fir.box>, !fir.ref>) +! CHECK: %[[VAL_9:.*]] = fir.convert %[[VAL_8]]#0 : (!fir.box>) -> !fir.box> +! CHECK: fir.call @_QPint_assumed_rank(%[[VAL_9]]) fastmath : (!fir.box>) -> () -- cgit v1.1 From cbdc86e46c3824dda152db2bd0b9fdb3872ddf87 Mon Sep 17 00:00:00 2001 From: "Andrew V. Teylu" Date: Thu, 4 Apr 2024 11:10:23 +0100 Subject: [clang-repl] Add call to 'InitializeAllAsmParsers' (#86727) This PR fixes the following issue when working with `clang-repl`: ``` fatal error: error in backend: Inline asm not supported by this streamer because we don't have an asm parser for this target ``` When working with the following input (named "unit.cpp"): ```cpp __asm(".globl _ZSt21ios_base_library_initv"); int x; ``` and then in `clang-repl`: ``` #include "unit.cpp" x = 10; ``` Signed-off-by: Andrew V. Teylu --- clang/test/Interpreter/inline-asm.cpp | 17 +++++++++++++++++ clang/tools/clang-repl/ClangRepl.cpp | 1 + 2 files changed, 18 insertions(+) create mode 100644 clang/test/Interpreter/inline-asm.cpp diff --git a/clang/test/Interpreter/inline-asm.cpp b/clang/test/Interpreter/inline-asm.cpp new file mode 100644 index 0000000..f94f14d --- /dev/null +++ b/clang/test/Interpreter/inline-asm.cpp @@ -0,0 +1,17 @@ +// REQUIRES: host-supports-jit, x86_64-linux +// UNSUPPORTED: system-aix +// +// RUN: rm -rf %t +// RUN: mkdir -p %t +// RUN: split-file %s %t +// +// RUN: cat %t/inline-asm.txt | clang-repl -Xcc="-I%t" + +//--- inline-asm.cpp +__asm(".globl _ZSt21ios_base_library_initv"); +int x; + +//--- inline-asm.txt +#include "inline-asm.cpp" +x = 10; +%quit diff --git a/clang/tools/clang-repl/ClangRepl.cpp b/clang/tools/clang-repl/ClangRepl.cpp index 5bad814..aecf61b 100644 --- a/clang/tools/clang-repl/ClangRepl.cpp +++ b/clang/tools/clang-repl/ClangRepl.cpp @@ -152,6 +152,7 @@ int main(int argc, const char **argv) { llvm::InitializeAllTargets(); llvm::InitializeAllTargetMCs(); llvm::InitializeAllAsmPrinters(); + llvm::InitializeAllAsmParsers(); if (OptHostSupportsJit) { auto J = llvm::orc::LLJITBuilder().create(); -- cgit v1.1 From 2d0087424f120c97c8fdecf50e6e5f6a1f3969f2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 11:10:55 +0100 Subject: [DAG] Remove extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x), idx) fold (#87480) Reverse the fold with handling inside canCreateUndefOrPoison for cases where we know that the extract index is in bounds. This exposed a number or regressions, and required some initial freeze handling of SCALAR_TO_VECTOR, which will require us to properly improve demandedelts support to handle its undef upper elements. There is still one outstanding regression to be addressed in the future - how do we want to handle folds involving frozen loads? Fixes #86968 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 6 ------ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 11 +++++++++++ llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 21 +++++++++++++++++++++ llvm/lib/Target/X86/X86ISelLowering.cpp | 2 ++ llvm/test/CodeGen/X86/freeze-vector.ll | 6 ++---- ...den-load-of-small-alloca-with-zero-upper-half.ll | 18 +++++++++++++----- 6 files changed, 49 insertions(+), 15 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 28fe069..0a47318 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -22265,12 +22265,6 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { IndexC->getAPIntValue().uge(VecVT.getVectorNumElements())) return DAG.getUNDEF(ScalarVT); - // extract_vector_elt(freeze(x)), idx -> freeze(extract_vector_elt(x)), idx - if (VecOp.hasOneUse() && VecOp.getOpcode() == ISD::FREEZE) { - return DAG.getFreeze(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ScalarVT, - VecOp.getOperand(0), Index)); - } - // extract_vector_elt (build_vector x, y), 1 -> y if (((IndexC && VecOp.getOpcode() == ISD::BUILD_VECTOR) || VecOp.getOpcode() == ISD::SPLAT_VECTOR) && diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 25b51d5..1dd0fa4 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5149,6 +5149,17 @@ bool SelectionDAG::canCreateUndefOrPoison(SDValue Op, const APInt &DemandedElts, case ISD::OR: return ConsiderFlags && Op->getFlags().hasDisjoint(); + case ISD::SCALAR_TO_VECTOR: + // Check if we demand any upper (undef) elements. + return !PoisonOnly && DemandedElts.ugt(1); + + case ISD::EXTRACT_VECTOR_ELT: { + // Ensure that the element index is in bounds. + EVT VecVT = Op.getOperand(0).getValueType(); + KnownBits KnownIdx = computeKnownBits(Op.getOperand(1), Depth + 1); + return KnownIdx.getMaxValue().uge(VecVT.getVectorMinNumElements()); + } + case ISD::INSERT_VECTOR_ELT:{ // Ensure that the element index is in bounds. EVT VecVT = Op.getOperand(0).getValueType(); diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 8bb9541..5e053f9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -742,6 +742,13 @@ SDValue TargetLowering::SimplifyMultipleUseDemandedBits( break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return N0; + break; + } case ISD::AND: { LHSKnown = DAG.computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1); RHSKnown = DAG.computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1); @@ -3184,6 +3191,20 @@ bool TargetLowering::SimplifyDemandedVectorElts( } break; } + case ISD::FREEZE: { + SDValue N0 = Op.getOperand(0); + if (TLO.DAG.isGuaranteedNotToBeUndefOrPoison(N0, DemandedElts, + /*PoisonOnly=*/false)) + return TLO.CombineTo(Op, N0); + + // TODO: Replace this with the general fold from DAGCombiner::visitFREEZE + // freeze(op(x, ...)) -> op(freeze(x), ...). + if (N0.getOpcode() == ISD::SCALAR_TO_VECTOR && DemandedElts == 1) + return TLO.CombineTo( + Op, TLO.DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, + TLO.DAG.getFreeze(N0.getOperand(0)))); + break; + } case ISD::BUILD_VECTOR: { // Check all elements and simplify any unused elements with UNDEF. if (!DemandedElts.isAllOnes()) { diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index a9751e1..6f65344 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -42725,6 +42725,8 @@ bool X86TargetLowering::canCreateUndefOrPoisonForTargetNode( switch (Op.getOpcode()) { case X86ISD::PSHUFD: case X86ISD::VPERMILPI: + case X86ISD::UNPCKH: + case X86ISD::UNPCKL: return false; } return TargetLowering::canCreateUndefOrPoisonForTargetNode( diff --git a/llvm/test/CodeGen/X86/freeze-vector.ll b/llvm/test/CodeGen/X86/freeze-vector.ll index d9ee5f0..ee7f4ae 100644 --- a/llvm/test/CodeGen/X86/freeze-vector.ll +++ b/llvm/test/CodeGen/X86/freeze-vector.ll @@ -173,16 +173,14 @@ define void @freeze_extractelement(ptr %origin0, ptr %origin1, ptr %dst) nounwin ; X86-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NEXT: vmovdqa (%edx), %xmm0 ; X86-NEXT: vpand (%ecx), %xmm0, %xmm0 -; X86-NEXT: vpextrb $6, %xmm0, %ecx -; X86-NEXT: movb %cl, (%eax) +; X86-NEXT: vpextrb $6, %xmm0, (%eax) ; X86-NEXT: retl ; ; X64-LABEL: freeze_extractelement: ; X64: # %bb.0: ; X64-NEXT: vmovdqa (%rdi), %xmm0 ; X64-NEXT: vpand (%rsi), %xmm0, %xmm0 -; X64-NEXT: vpextrb $6, %xmm0, %eax -; X64-NEXT: movb %al, (%rdx) +; X64-NEXT: vpextrb $6, %xmm0, (%rdx) ; X64-NEXT: retq %i0 = load <16 x i8>, ptr %origin0 %i1 = load <16 x i8>, ptr %origin1 diff --git a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll index 691ca40..f7a27a5 100644 --- a/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll +++ b/llvm/test/CodeGen/X86/widen-load-of-small-alloca-with-zero-upper-half.ll @@ -65,6 +65,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax @@ -74,6 +75,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax ; X64-BMI2-NEXT: movb %al, (%rdx) @@ -81,14 +83,15 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; ; X86-NO-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X86-NO-BMI2: # %bb.0: -; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx -; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %eax -; X86-NO-BMI2-NEXT: movzwl (%eax), %eax +; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NO-BMI2-NEXT: movzwl (%edx), %edx +; X86-NO-BMI2-NEXT: movzwl %dx, %edx ; X86-NO-BMI2-NEXT: shll $3, %ecx ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx -; X86-NO-BMI2-NEXT: shrl %cl, %eax -; X86-NO-BMI2-NEXT: movb %al, (%edx) +; X86-NO-BMI2-NEXT: shrl %cl, %edx +; X86-NO-BMI2-NEXT: movb %dl, (%eax) ; X86-NO-BMI2-NEXT: retl ; ; X86-BMI2-LABEL: load_1byte_chunk_of_4byte_alloca_with_zero_upper_half: @@ -97,6 +100,7 @@ define void @load_1byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movzwl (%edx), %edx +; X86-BMI2-NEXT: movzwl %dx, %edx ; X86-BMI2-NEXT: shll $3, %ecx ; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx ; X86-BMI2-NEXT: movb %cl, (%eax) @@ -119,6 +123,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-NO-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-NO-BMI2: # %bb.0: ; X64-NO-BMI2-NEXT: movzwl (%rdi), %eax +; X64-NO-BMI2-NEXT: movzwl %ax, %eax ; X64-NO-BMI2-NEXT: leal (,%rsi,8), %ecx ; X64-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X64-NO-BMI2-NEXT: shrl %cl, %eax @@ -128,6 +133,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X64-BMI2-LABEL: load_2byte_chunk_of_4byte_alloca_with_zero_upper_half: ; X64-BMI2: # %bb.0: ; X64-BMI2-NEXT: movzwl (%rdi), %eax +; X64-BMI2-NEXT: movzwl %ax, %eax ; X64-BMI2-NEXT: shll $3, %esi ; X64-BMI2-NEXT: shrxl %esi, %eax, %eax ; X64-BMI2-NEXT: movw %ax, (%rdx) @@ -139,6 +145,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-NO-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-NO-BMI2-NEXT: movzwl (%edx), %edx +; X86-NO-BMI2-NEXT: movzwl %dx, %edx ; X86-NO-BMI2-NEXT: shll $3, %ecx ; X86-NO-BMI2-NEXT: # kill: def $cl killed $cl killed $ecx ; X86-NO-BMI2-NEXT: shrl %cl, %edx @@ -151,6 +158,7 @@ define void @load_2byte_chunk_of_4byte_alloca_with_zero_upper_half(ptr %src, i64 ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %ecx ; X86-BMI2-NEXT: movl {{[0-9]+}}(%esp), %edx ; X86-BMI2-NEXT: movzwl (%edx), %edx +; X86-BMI2-NEXT: movzwl %dx, %edx ; X86-BMI2-NEXT: shll $3, %ecx ; X86-BMI2-NEXT: shrxl %ecx, %edx, %ecx ; X86-BMI2-NEXT: movw %cx, (%eax) -- cgit v1.1 From dbd6eb6779bc7073c5466e9689eb5a69736d1120 Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 4 Apr 2024 11:14:28 +0100 Subject: [flang][OpenMP] lower reductions of assumed shape arrays (#86982) Patch 1: https://github.com/llvm/llvm-project/pull/86978 Patch 2: https://github.com/llvm/llvm-project/pull/86979 --- flang/lib/Lower/OpenMP/ReductionProcessor.cpp | 25 +++++- .../wsloop-reduction-array-assumed-shape.f90 | 90 ++++++++++++++++++++++ 2 files changed, 112 insertions(+), 3 deletions(-) create mode 100644 flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 diff --git a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp index 6a8447a..c1c9411 100644 --- a/flang/lib/Lower/OpenMP/ReductionProcessor.cpp +++ b/flang/lib/Lower/OpenMP/ReductionProcessor.cpp @@ -523,11 +523,16 @@ void ReductionProcessor::addDeclareReduction( if (reductionSymbols) reductionSymbols->push_back(symbol); mlir::Value symVal = converter.getSymbolAddress(*symbol); - auto redType = mlir::cast(symVal.getType()); + mlir::Type eleType; + auto refType = mlir::dyn_cast_or_null(symVal.getType()); + if (refType) + eleType = refType.getEleTy(); + else + eleType = symVal.getType(); // all arrays must be boxed so that we have convenient access to all the // information needed to iterate over the array - if (mlir::isa(redType.getEleTy())) { + if (mlir::isa(eleType)) { // For Host associated symbols, use `SymbolBox` instead Fortran::lower::SymbolBox symBox = converter.lookupOneLevelUpSymbol(*symbol); @@ -542,11 +547,25 @@ void ReductionProcessor::addDeclareReduction( builder.create(currentLocation, box, alloca); symVal = alloca; - redType = mlir::cast(symVal.getType()); + } else if (mlir::isa(symVal.getType())) { + // boxed arrays are passed as values not by reference. Unfortunately, + // we can't pass a box by value to omp.redution_declare, so turn it + // into a reference + + auto alloca = + builder.create(currentLocation, symVal.getType()); + builder.create(currentLocation, symVal, alloca); + symVal = alloca; } else if (auto declOp = symVal.getDefiningOp()) { symVal = declOp.getBase(); } + // this isn't the same as the by-val and by-ref passing later in the + // pipeline. Both styles assume that the variable is a reference at + // this point + assert(mlir::isa(symVal.getType()) && + "reduction input var is a reference"); + reductionVars.push_back(symVal); } const bool isByRef = doReductionByRef(reductionVars); diff --git a/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 new file mode 100644 index 0000000..a1f339f --- /dev/null +++ b/flang/test/Lower/OpenMP/wsloop-reduction-array-assumed-shape.f90 @@ -0,0 +1,90 @@ +! RUN: bbc -emit-hlfir -fopenmp -o - %s | FileCheck %s +! RUN: %flang_fc1 -emit-hlfir -fopenmp -o - %s | FileCheck %s + +program reduce_assumed_shape +real(8), dimension(2) :: r +r = 0 +call reduce(r) +print *, r + +contains +subroutine reduce(r) + implicit none + real(8),intent(inout) :: r(:) + integer :: i = 0 + + !$omp parallel do reduction(+:r) + do i=0,10 + r(1) = i + r(2) = 1 + enddo + !$omp end parallel do +end subroutine +end program + +! CHECK-LABEL: omp.declare_reduction @add_reduction_byref_box_Uxf64 : !fir.ref>> init { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>): +! CHECK: %[[VAL_1:.*]] = arith.constant 0.000000e+00 : f64 +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_4:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_3]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_5:.*]] = fir.shape %[[VAL_4]]#1 : (index) -> !fir.shape<1> +! CHECK: %[[VAL_6:.*]] = fir.alloca !fir.array, %[[VAL_4]]#1 {bindc_name = ".tmp"} +! CHECK: %[[VAL_7:.*]]:2 = hlfir.declare %[[VAL_6]](%[[VAL_5]]) {uniq_name = ".tmp"} : (!fir.ref>, !fir.shape<1>) -> (!fir.box>, !fir.ref>) +! CHECK: hlfir.assign %[[VAL_1]] to %[[VAL_7]]#0 : f64, !fir.box> +! CHECK: %[[VAL_8:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_7]]#0 to %[[VAL_8]] : !fir.ref>> +! CHECK: omp.yield(%[[VAL_8]] : !fir.ref>>) + +! CHECK-LABEL: } combiner { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref>>, %[[VAL_1:.*]]: !fir.ref>>): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref>> +! CHECK: %[[VAL_3:.*]] = fir.load %[[VAL_1]] : !fir.ref>> +! CHECK: %[[VAL_4:.*]] = arith.constant 0 : index +! CHECK: %[[VAL_5:.*]]:3 = fir.box_dims %[[VAL_2]], %[[VAL_4]] : (!fir.box>, index) -> (index, index, index) +! CHECK: %[[VAL_6:.*]] = fir.shape_shift %[[VAL_5]]#0, %[[VAL_5]]#1 : (index, index) -> !fir.shapeshift<1> +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : index +! CHECK: fir.do_loop %[[VAL_8:.*]] = %[[VAL_7]] to %[[VAL_5]]#1 step %[[VAL_7]] unordered { +! CHECK: %[[VAL_9:.*]] = fir.array_coor %[[VAL_2]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_10:.*]] = fir.array_coor %[[VAL_3]](%[[VAL_6]]) %[[VAL_8]] : (!fir.box>, !fir.shapeshift<1>, index) -> !fir.ref +! CHECK: %[[VAL_11:.*]] = fir.load %[[VAL_9]] : !fir.ref +! CHECK: %[[VAL_12:.*]] = fir.load %[[VAL_10]] : !fir.ref +! CHECK: %[[VAL_13:.*]] = arith.addf %[[VAL_11]], %[[VAL_12]] fastmath : f64 +! CHECK: fir.store %[[VAL_13]] to %[[VAL_9]] : !fir.ref +! CHECK: } +! CHECK: omp.yield(%[[VAL_0]] : !fir.ref>>) +! CHECK: } + +! CHECK-LABEL: func.func private @_QFPreduce( +! CHECK-SAME: %[[VAL_0:.*]]: !fir.box> {fir.bindc_name = "r"}) attributes {{.*}} { +! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFFreduceEi) : !fir.ref +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFFreduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]]:2 = hlfir.declare %[[VAL_0]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.box>) -> (!fir.box>, !fir.box>) +! CHECK: omp.parallel { +! CHECK: %[[VAL_4:.*]] = fir.alloca i32 {adapt.valuebyref, pinned} +! CHECK: %[[VAL_5:.*]]:2 = hlfir.declare %[[VAL_4]] {uniq_name = "_QFFreduceEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_6:.*]] = arith.constant 0 : i32 +! CHECK: %[[VAL_7:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_8:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_9:.*]] = fir.alloca !fir.box> +! CHECK: fir.store %[[VAL_3]]#1 to %[[VAL_9]] : !fir.ref>> +! CHECK: omp.wsloop byref reduction(@add_reduction_byref_box_Uxf64 %[[VAL_9]] -> %[[VAL_10:.*]] : !fir.ref>>) for (%[[VAL_11:.*]]) : i32 = (%[[VAL_6]]) to (%[[VAL_7]]) inclusive step (%[[VAL_8]]) { +! CHECK: fir.store %[[VAL_11]] to %[[VAL_5]]#1 : !fir.ref +! CHECK: %[[VAL_12:.*]]:2 = hlfir.declare %[[VAL_10]] {fortran_attrs = {{.*}}, uniq_name = "_QFFreduceEr"} : (!fir.ref>>) -> (!fir.ref>>, !fir.ref>>) +! CHECK: %[[VAL_13:.*]] = fir.load %[[VAL_5]]#0 : !fir.ref +! CHECK: %[[VAL_14:.*]] = fir.convert %[[VAL_13]] : (i32) -> f64 +! CHECK: %[[VAL_15:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref>> +! CHECK: %[[VAL_16:.*]] = arith.constant 1 : index +! CHECK: %[[VAL_17:.*]] = hlfir.designate %[[VAL_15]] (%[[VAL_16]]) : (!fir.box>, index) -> !fir.ref +! CHECK: hlfir.assign %[[VAL_14]] to %[[VAL_17]] : f64, !fir.ref +! CHECK: %[[VAL_18:.*]] = arith.constant 1.000000e+00 : f64 +! CHECK: %[[VAL_19:.*]] = fir.load %[[VAL_12]]#0 : !fir.ref>> +! CHECK: %[[VAL_20:.*]] = arith.constant 2 : index +! CHECK: %[[VAL_21:.*]] = hlfir.designate %[[VAL_19]] (%[[VAL_20]]) : (!fir.box>, index) -> !fir.ref +! CHECK: hlfir.assign %[[VAL_18]] to %[[VAL_21]] : f64, !fir.ref +! CHECK: omp.yield +! CHECK: } +! CHECK: omp.terminator +! CHECK: } +! CHECK: return +! CHECK: } -- cgit v1.1 From cc34ad91f0d0cfb4e568e67eedc8a6a3684b89ea Mon Sep 17 00:00:00 2001 From: Tom Eccles Date: Thu, 4 Apr 2024 11:19:42 +0100 Subject: [MLIR][OpenMP] Add cleanup region to omp.declare_reduction (#87377) Currently, by-ref reductions will allocate the per-thread reduction variable in the initialization region. Adding a cleanup region allows that allocation to be undone. This will allow flang to support reduction of arrays stored on the heap. This conflation of allocation and initialization in the initialization should be fixed in the future to better match the OpenMP standard, but that is beyond the scope of this patch. --- mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td | 14 +++- mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp | 49 ++++++++--- .../Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp | 53 ++++++++++-- mlir/test/Dialect/OpenMP/invalid.mlir | 19 +++++ mlir/test/Dialect/OpenMP/ops.mlir | 8 ++ .../LLVMIR/openmp-parallel-reduction-cleanup.mlir | 94 ++++++++++++++++++++++ .../LLVMIR/openmp-wsloop-reduction-cleanup.mlir | 86 ++++++++++++++++++++ 7 files changed, 299 insertions(+), 24 deletions(-) create mode 100644 mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir create mode 100644 mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index f33942b..4574518 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -2135,8 +2135,8 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [Symbol, let summary = "declares a reduction kind"; let description = [{ - Declares an OpenMP reduction kind. This requires two mandatory and one - optional region. + Declares an OpenMP reduction kind. This requires two mandatory and two + optional regions. 1. The initializer region specifies how to initialize the thread-local reduction value. This is usually the neutral element of the reduction. @@ -2149,6 +2149,10 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [Symbol, 3. The atomic reduction region is optional and specifies how two values can be combined atomically given local accumulator variables. It is expected to store the combined value in the first accumulator variable. + 4. The cleanup region is optional and specifies how to clean up any memory + allocated by the initializer region. The region has an argument that + contains the value of the thread-local reduction accumulator. This will + be executed after the reduction has completed. Note that the MLIR type system does not allow for type-polymorphic reductions. Separate reduction declarations should be created for different @@ -2163,12 +2167,14 @@ def DeclareReductionOp : OpenMP_Op<"declare_reduction", [Symbol, let regions = (region AnyRegion:$initializerRegion, AnyRegion:$reductionRegion, - AnyRegion:$atomicReductionRegion); + AnyRegion:$atomicReductionRegion, + AnyRegion:$cleanupRegion); let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword " "`init` $initializerRegion " "`combiner` $reductionRegion " - "custom($atomicReductionRegion)"; + "custom($atomicReductionRegion) " + "custom($cleanupRegion)"; let extraClassDeclaration = [{ PointerLikeType getAccumulatorType() { diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index bf58750..a043431 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -1538,6 +1538,21 @@ static void printAtomicReductionRegion(OpAsmPrinter &printer, printer.printRegion(region); } +static ParseResult parseCleanupReductionRegion(OpAsmParser &parser, + Region ®ion) { + if (parser.parseOptionalKeyword("cleanup")) + return success(); + return parser.parseRegion(region); +} + +static void printCleanupReductionRegion(OpAsmPrinter &printer, + DeclareReductionOp op, Region ®ion) { + if (region.empty()) + return; + printer << "cleanup "; + printer.printRegion(region); +} + LogicalResult DeclareReductionOp::verifyRegions() { if (getInitializerRegion().empty()) return emitOpError() << "expects non-empty initializer region"; @@ -1571,21 +1586,29 @@ LogicalResult DeclareReductionOp::verifyRegions() { "of the reduction type"; } - if (getAtomicReductionRegion().empty()) + if (!getAtomicReductionRegion().empty()) { + Block &atomicReductionEntryBlock = getAtomicReductionRegion().front(); + if (atomicReductionEntryBlock.getNumArguments() != 2 || + atomicReductionEntryBlock.getArgumentTypes()[0] != + atomicReductionEntryBlock.getArgumentTypes()[1]) + return emitOpError() << "expects atomic reduction region with two " + "arguments of the same type"; + auto ptrType = llvm::dyn_cast( + atomicReductionEntryBlock.getArgumentTypes()[0]); + if (!ptrType || + (ptrType.getElementType() && ptrType.getElementType() != getType())) + return emitOpError() << "expects atomic reduction region arguments to " + "be accumulators containing the reduction type"; + } + + if (getCleanupRegion().empty()) return success(); + Block &cleanupEntryBlock = getCleanupRegion().front(); + if (cleanupEntryBlock.getNumArguments() != 1 || + cleanupEntryBlock.getArgument(0).getType() != getType()) + return emitOpError() << "expects cleanup region with one argument " + "of the reduction type"; - Block &atomicReductionEntryBlock = getAtomicReductionRegion().front(); - if (atomicReductionEntryBlock.getNumArguments() != 2 || - atomicReductionEntryBlock.getArgumentTypes()[0] != - atomicReductionEntryBlock.getArgumentTypes()[1]) - return emitOpError() << "expects atomic reduction region with two " - "arguments of the same type"; - auto ptrType = llvm::dyn_cast( - atomicReductionEntryBlock.getArgumentTypes()[0]); - if (!ptrType || - (ptrType.getElementType() && ptrType.getElementType() != getType())) - return emitOpError() << "expects atomic reduction region arguments to " - "be accumulators containing the reduction type"; return success(); } diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index c4bf6a2..08ec578 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -877,6 +877,32 @@ static void collectReductionInfo( } } +/// handling of DeclareReductionOp's cleanup region +static LogicalResult inlineReductionCleanup( + llvm::SmallVectorImpl &reductionDecls, + llvm::ArrayRef privateReductionVariables, + LLVM::ModuleTranslation &moduleTranslation, llvm::IRBuilderBase &builder) { + for (auto [i, reductionDecl] : llvm::enumerate(reductionDecls)) { + Region &cleanupRegion = reductionDecl.getCleanupRegion(); + if (cleanupRegion.empty()) + continue; + + // map the argument to the cleanup region + Block &entry = cleanupRegion.front(); + moduleTranslation.mapValue(entry.getArgument(0), + privateReductionVariables[i]); + + if (failed(inlineConvertOmpRegions(cleanupRegion, "omp.reduction.cleanup", + builder, moduleTranslation))) + return failure(); + + // clear block argument mapping in case it needs to be re-created with a + // different source for another use of the same reduction decl + moduleTranslation.forgetMapping(cleanupRegion); + } + return success(); +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -1072,7 +1098,9 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder, tempTerminator->eraseFromParent(); builder.restoreIP(nextInsertionPoint); - return success(); + // after the workshare loop, deallocate private reduction variables + return inlineReductionCleanup(reductionDecls, privateReductionVariables, + moduleTranslation, builder); } /// A RAII class that on construction replaces the region arguments of the @@ -1125,13 +1153,13 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, LogicalResult bodyGenStatus = success(); llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder(); - auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { - // Collect reduction declarations - SmallVector reductionDecls; - collectReductionDecls(opInst, reductionDecls); + // Collect reduction declarations + SmallVector reductionDecls; + collectReductionDecls(opInst, reductionDecls); + SmallVector privateReductionVariables; + auto bodyGenCB = [&](InsertPointTy allocaIP, InsertPointTy codeGenIP) { // Allocate reduction vars - SmallVector privateReductionVariables; DenseMap reductionVariableMap; if (!isByRef) { allocByValReductionVars(opInst, builder, moduleTranslation, allocaIP, @@ -1331,7 +1359,18 @@ convertOmpParallel(omp::ParallelOp opInst, llvm::IRBuilderBase &builder, // TODO: Perform finalization actions for variables. This has to be // called for variables which have destructors/finalizers. - auto finiCB = [&](InsertPointTy codeGenIP) {}; + auto finiCB = [&](InsertPointTy codeGenIP) { + InsertPointTy oldIP = builder.saveIP(); + builder.restoreIP(codeGenIP); + + // if the reduction has a cleanup region, inline it here to finalize the + // reduction variables + if (failed(inlineReductionCleanup(reductionDecls, privateReductionVariables, + moduleTranslation, builder))) + bodyGenStatus = failure(); + + builder.restoreIP(oldIP); + }; llvm::Value *ifCond = nullptr; if (auto ifExprVar = opInst.getIfExprVar()) diff --git a/mlir/test/Dialect/OpenMP/invalid.mlir b/mlir/test/Dialect/OpenMP/invalid.mlir index a00383c..1134db7 100644 --- a/mlir/test/Dialect/OpenMP/invalid.mlir +++ b/mlir/test/Dialect/OpenMP/invalid.mlir @@ -436,6 +436,25 @@ atomic { // ----- +// expected-error @below {{op expects cleanup region with one argument of the reduction type}} +omp.declare_reduction @add_f32 : f32 +init { +^bb0(%arg: f32): + %0 = arith.constant 0.0 : f32 + omp.yield (%0 : f32) +} +combiner { +^bb1(%arg0: f32, %arg1: f32): + %1 = arith.addf %arg0, %arg1 : f32 + omp.yield (%1 : f32) +} +cleanup { +^bb0(%arg: f64): + omp.yield +} + +// ----- + func.func @foo(%lb : index, %ub : index, %step : index) { %c1 = arith.constant 1 : i32 %0 = llvm.alloca %c1 x i32 : (i32) -> !llvm.ptr diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 30ce774..e2c255c 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -603,6 +603,8 @@ func.func @omp_target_pretty(%if_cond : i1, %device : si32, %num_threads : i32) // CHECK: atomic // CHECK: ^{{.+}}(%{{.+}}: !llvm.ptr, %{{.+}}: !llvm.ptr): // CHECK: omp.yield +// CHECK: cleanup +// CHECK: omp.yield omp.declare_reduction @add_f32 : f32 init { ^bb0(%arg: f32): @@ -620,6 +622,10 @@ atomic { llvm.atomicrmw fadd %arg2, %2 monotonic : !llvm.ptr, f32 omp.yield } +cleanup { +^bb0(%arg: f32): + omp.yield +} // CHECK-LABEL: func @wsloop_reduction func.func @wsloop_reduction(%lb : index, %ub : index, %step : index) { @@ -789,6 +795,7 @@ combiner { omp.yield (%1 : f32) } // CHECK-NOT: atomic +// CHECK-NOT: cleanup // CHECK-LABEL: func @wsloop_reduction2 func.func @wsloop_reduction2(%lb : index, %ub : index, %step : index) { @@ -2088,6 +2095,7 @@ func.func @opaque_pointers_atomic_rwu(%v: !llvm.ptr, %x: !llvm.ptr) { // CHECK-LABEL: @opaque_pointers_reduction // CHECK: atomic { // CHECK-NEXT: ^{{[[:alnum:]]+}}(%{{.*}}: !llvm.ptr, %{{.*}}: !llvm.ptr): +// CHECK-NOT: cleanup omp.declare_reduction @opaque_pointers_reduction : f32 init { ^bb0(%arg: f32): diff --git a/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir new file mode 100644 index 0000000..9ae4c4a --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-parallel-reduction-cleanup.mlir @@ -0,0 +1,94 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// test a parallel reduction with a cleanup region + + omp.declare_reduction @add_reduction_i_32 : !llvm.ptr init { + ^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i32) : i32 + %c4 = llvm.mlir.constant(4 : i64) : i64 + %2 = llvm.call @malloc(%c4) : (i64) -> !llvm.ptr + llvm.store %0, %2 : i32, !llvm.ptr + omp.yield(%2 : !llvm.ptr) + } combiner { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> i32 + %1 = llvm.load %arg1 : !llvm.ptr -> i32 + %2 = llvm.add %0, %1 : i32 + llvm.store %2, %arg0 : i32, !llvm.ptr + omp.yield(%arg0 : !llvm.ptr) + } cleanup { + ^bb0(%arg0: !llvm.ptr): + llvm.call @free(%arg0) : (!llvm.ptr) -> () + omp.yield + } + + // CHECK-LABEL: @main + llvm.func @main() { + %0 = llvm.mlir.constant(-1 : i32) : i32 + %1 = llvm.mlir.addressof @i : !llvm.ptr + %2 = llvm.mlir.addressof @j : !llvm.ptr + omp.parallel byref reduction(@add_reduction_i_32 %1 -> %arg0 : !llvm.ptr, @add_reduction_i_32 %2 -> %arg1 : !llvm.ptr) { + llvm.store %0, %arg0 : i32, !llvm.ptr + llvm.store %0, %arg1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @i() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @j() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + llvm.func @malloc(%arg0 : i64) -> !llvm.ptr + llvm.func @free(%arg0 : !llvm.ptr) -> () + +// CHECK: %{{.+}} = +// Call to the outlined function. +// CHECK: call void {{.*}} @__kmpc_fork_call +// CHECK-SAME: @[[OUTLINED:[A-Za-z_.][A-Za-z0-9_.]*]] + +// Outlined function. +// CHECK: define internal void @[[OUTLINED]] + +// Private reduction variable and its initialization. +// CHECK: %tid.addr.local = alloca i32 +// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) +// CHECK: %[[PRIV_PTR_I:.+]] = alloca ptr +// CHECK: store ptr %[[MALLOC_I]], ptr %[[PRIV_PTR_I]] +// CHECK: %[[MALLOC_J:.+]] = call ptr @malloc(i64 4) +// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr +// CHECK: store ptr %[[MALLOC_J]], ptr %[[PRIV_PTR_J]] + +// Call to the reduction function. +// CHECK: call i32 @__kmpc_reduce +// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] + + +// Non-atomic reduction: +// CHECK: %[[PRIV_VAL_PTR_I:.+]] = load ptr, ptr %[[PRIV_PTR_I]] +// CHECK: %[[LOAD_I:.+]] = load i32, ptr @i +// CHECK: %[[PRIV_VAL_I:.+]] = load i32, ptr %[[PRIV_VAL_PTR_I]] +// CHECK: %[[SUM_I:.+]] = add i32 %[[LOAD_I]], %[[PRIV_VAL_I]] +// CHECK: store i32 %[[SUM_I]], ptr @i +// CHECK: %[[PRIV_VAL_PTR_J:.+]] = load ptr, ptr %[[PRIV_PTR_J]] +// CHECK: %[[LOAD_J:.+]] = load i32, ptr @j +// CHECK: %[[PRIV_VAL_J:.+]] = load i32, ptr %[[PRIV_VAL_PTR_J]] +// CHECK: %[[SUM_J:.+]] = add i32 %[[LOAD_J]], %[[PRIV_VAL_J]] +// CHECK: store i32 %[[SUM_J]], ptr @j +// CHECK: call void @__kmpc_end_reduce +// CHECK: br label %[[FINALIZE:.+]] + +// CHECK: [[FINALIZE]]: +// CHECK: br label %[[OMP_FINALIZE:.+]] + +// Cleanup region: +// CHECK: [[OMP_FINALIZE]]: +// CHECK: call void @free(ptr %[[PRIV_PTR_I]]) +// CHECK: call void @free(ptr %[[PRIV_PTR_J]]) + +// Reduction function. +// CHECK: define internal void @[[REDFUNC]] +// CHECK: add i32 diff --git a/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir b/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir new file mode 100644 index 0000000..a1e17af --- /dev/null +++ b/mlir/test/Target/LLVMIR/openmp-wsloop-reduction-cleanup.mlir @@ -0,0 +1,86 @@ +// RUN: mlir-translate -mlir-to-llvmir -split-input-file %s | FileCheck %s + +// test a wsloop reduction with a cleanup region + + omp.declare_reduction @add_reduction_i_32 : !llvm.ptr init { + ^bb0(%arg0: !llvm.ptr): + %0 = llvm.mlir.constant(0 : i32) : i32 + %c4 = llvm.mlir.constant(4 : i64) : i64 + %2 = llvm.call @malloc(%c4) : (i64) -> !llvm.ptr + llvm.store %0, %2 : i32, !llvm.ptr + omp.yield(%2 : !llvm.ptr) + } combiner { + ^bb0(%arg0: !llvm.ptr, %arg1: !llvm.ptr): + %0 = llvm.load %arg0 : !llvm.ptr -> i32 + %1 = llvm.load %arg1 : !llvm.ptr -> i32 + %2 = llvm.add %0, %1 : i32 + llvm.store %2, %arg0 : i32, !llvm.ptr + omp.yield(%arg0 : !llvm.ptr) + } cleanup { + ^bb0(%arg0: !llvm.ptr): + llvm.call @free(%arg0) : (!llvm.ptr) -> () + omp.yield + } + + // CHECK-LABEL: @main + llvm.func @main() { + %0 = llvm.mlir.constant(-1 : i32) : i32 + %1 = llvm.mlir.addressof @i : !llvm.ptr + %2 = llvm.mlir.addressof @j : !llvm.ptr + %loop_ub = llvm.mlir.constant(9 : i32) : i32 + %loop_lb = llvm.mlir.constant(0 : i32) : i32 + %loop_step = llvm.mlir.constant(1 : i32) : i32 + omp.wsloop byref reduction(@add_reduction_i_32 %1 -> %arg0 : !llvm.ptr, @add_reduction_i_32 %2 -> %arg1 : !llvm.ptr) for (%loop_cnt) : i32 = (%loop_lb) to (%loop_ub) inclusive step (%loop_step) { + llvm.store %0, %arg0 : i32, !llvm.ptr + llvm.store %0, %arg1 : i32, !llvm.ptr + omp.terminator + } + llvm.return + } + llvm.mlir.global internal @i() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + llvm.mlir.global internal @j() {addr_space = 0 : i32} : i32 { + %0 = llvm.mlir.constant(0 : i32) : i32 + llvm.return %0 : i32 + } + llvm.func @malloc(%arg0 : i64) -> !llvm.ptr + llvm.func @free(%arg0 : !llvm.ptr) -> () + +// Private reduction variable and its initialization. +// CHECK: %[[MALLOC_I:.+]] = call ptr @malloc(i64 4) +// CHECK: %[[PRIV_PTR_I:.+]] = alloca ptr +// CHECK: store ptr %[[MALLOC_I]], ptr %[[PRIV_PTR_I]] +// CHECK: %[[MALLOC_J:.+]] = call ptr @malloc(i64 4) +// CHECK: %[[PRIV_PTR_J:.+]] = alloca ptr +// CHECK: store ptr %[[MALLOC_J]], ptr %[[PRIV_PTR_J]] + +// Call to the reduction function. +// CHECK: call i32 @__kmpc_reduce +// CHECK-SAME: @[[REDFUNC:[A-Za-z_.][A-Za-z0-9_.]*]] + +// Weirdly the finalization block is generated before the reduction blocks: +// CHECK: [[FINALIZE:.+]]: +// CHECK: call void @__kmpc_barrier +// CHECK: call void @free(ptr %[[PRIV_PTR_I]]) +// CHECK: call void @free(ptr %[[PRIV_PTR_J]]) +// CHECK: ret void + +// Non-atomic reduction: +// CHECK: %[[PRIV_VAL_PTR_I:.+]] = load ptr, ptr %[[PRIV_PTR_I]] +// CHECK: %[[LOAD_I:.+]] = load i32, ptr @i +// CHECK: %[[PRIV_VAL_I:.+]] = load i32, ptr %[[PRIV_VAL_PTR_I]] +// CHECK: %[[SUM_I:.+]] = add i32 %[[LOAD_I]], %[[PRIV_VAL_I]] +// CHECK: store i32 %[[SUM_I]], ptr @i +// CHECK: %[[PRIV_VAL_PTR_J:.+]] = load ptr, ptr %[[PRIV_PTR_J]] +// CHECK: %[[LOAD_J:.+]] = load i32, ptr @j +// CHECK: %[[PRIV_VAL_J:.+]] = load i32, ptr %[[PRIV_VAL_PTR_J]] +// CHECK: %[[SUM_J:.+]] = add i32 %[[LOAD_J]], %[[PRIV_VAL_J]] +// CHECK: store i32 %[[SUM_J]], ptr @j +// CHECK: call void @__kmpc_end_reduce +// CHECK: br label %[[FINALIZE]] + +// Reduction function. +// CHECK: define internal void @[[REDFUNC]] +// CHECK: add i32 -- cgit v1.1 From 212b2bbcd1583353bccef3418e94912a30775715 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 11:22:37 +0100 Subject: [VectorCombine][X86] foldShuffleOfCastops - fold shuffle(cast(x),cast(y)) -> cast(shuffle(x,y)) iff cost efficient (#87510) Based off the existing foldShuffleOfBinops fold Fixes #67803 --- llvm/lib/Transforms/Vectorize/VectorCombine.cpp | 71 +++++++++++++++++++ llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll | 6 +- .../VectorCombine/X86/shuffle-of-casts.ll | 79 +++++++++++----------- 3 files changed, 110 insertions(+), 46 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index af5e7c9..3738220 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -112,6 +112,7 @@ private: bool foldSingleElementStore(Instruction &I); bool scalarizeLoadExtract(Instruction &I); bool foldShuffleOfBinops(Instruction &I); + bool foldShuffleOfCastops(Instruction &I); bool foldShuffleFromReductions(Instruction &I); bool foldTruncFromReductions(Instruction &I); bool foldSelectShuffle(Instruction &I, bool FromReduction = false); @@ -1432,6 +1433,75 @@ bool VectorCombine::foldShuffleOfBinops(Instruction &I) { return true; } +/// Try to convert "shuffle (castop), (castop)" with a shared castop operand +/// into "castop (shuffle)". +bool VectorCombine::foldShuffleOfCastops(Instruction &I) { + Value *V0, *V1; + ArrayRef Mask; + if (!match(&I, m_Shuffle(m_OneUse(m_Value(V0)), m_OneUse(m_Value(V1)), + m_Mask(Mask)))) + return false; + + auto *C0 = dyn_cast(V0); + auto *C1 = dyn_cast(V1); + if (!C0 || !C1) + return false; + + Instruction::CastOps Opcode = C0->getOpcode(); + if (Opcode == Instruction::BitCast || C0->getSrcTy() != C1->getSrcTy()) + return false; + + // Handle shuffle(zext_nneg(x), sext(y)) -> sext(shuffle(x,y)) folds. + if (Opcode != C1->getOpcode()) { + if (match(C0, m_SExtLike(m_Value())) && match(C1, m_SExtLike(m_Value()))) + Opcode = Instruction::SExt; + else + return false; + } + + auto *ShuffleDstTy = dyn_cast(I.getType()); + auto *CastDstTy = dyn_cast(C0->getDestTy()); + auto *CastSrcTy = dyn_cast(C0->getSrcTy()); + if (!ShuffleDstTy || !CastDstTy || !CastSrcTy) + return false; + assert(CastDstTy->getElementCount() == CastSrcTy->getElementCount() && + "Unexpected src/dst element counts"); + + auto *NewShuffleDstTy = + FixedVectorType::get(CastSrcTy->getScalarType(), Mask.size()); + + // Try to replace a castop with a shuffle if the shuffle is not costly. + TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput; + + InstructionCost OldCost = + TTI.getCastInstrCost(C0->getOpcode(), CastDstTy, CastSrcTy, + TTI::CastContextHint::None, CostKind) + + TTI.getCastInstrCost(C1->getOpcode(), CastDstTy, CastSrcTy, + TTI::CastContextHint::None, CostKind); + OldCost += TTI.getShuffleCost(TargetTransformInfo::SK_PermuteTwoSrc, + CastDstTy, Mask, CostKind); + + InstructionCost NewCost = TTI.getShuffleCost( + TargetTransformInfo::SK_PermuteTwoSrc, CastSrcTy, Mask, CostKind); + NewCost += TTI.getCastInstrCost(Opcode, ShuffleDstTy, NewShuffleDstTy, + TTI::CastContextHint::None, CostKind); + if (NewCost > OldCost) + return false; + + Value *Shuf = + Builder.CreateShuffleVector(C0->getOperand(0), C1->getOperand(0), Mask); + Value *Cast = Builder.CreateCast(Opcode, Shuf, ShuffleDstTy); + + // Intersect flags from the old casts. + if (auto *NewInst = dyn_cast(Cast)) { + NewInst->copyIRFlags(C0); + NewInst->andIRFlags(C1); + } + + replaceValue(I, *Cast); + return true; +} + /// Given a commutative reduction, the order of the input lanes does not alter /// the results. We can use this to remove certain shuffles feeding the /// reduction, removing the need to shuffle at all. @@ -1986,6 +2056,7 @@ bool VectorCombine::run() { break; case Instruction::ShuffleVector: MadeChange |= foldShuffleOfBinops(I); + MadeChange |= foldShuffleOfCastops(I); MadeChange |= foldSelectShuffle(I); break; case Instruction::BitCast: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll index 495ec0a..45e411d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/pr67803.ll @@ -9,11 +9,7 @@ define <4 x i64> @PR67803(<4 x i64> %x, <4 x i64> %y, <4 x i64> %a, <4 x i64> %b ; CHECK-NEXT: [[TMP0:%.*]] = bitcast <4 x i64> [[X:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i64> [[Y:%.*]] to <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i32> [[TMP0]], [[TMP1]] -; CHECK-NEXT: [[CMP_I21:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[SEXT_I22:%.*]] = sext <4 x i1> [[CMP_I21]] to <4 x i32> -; CHECK-NEXT: [[CMP_I:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> -; CHECK-NEXT: [[SEXT_I:%.*]] = sext <4 x i1> [[CMP_I]] to <4 x i32> -; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <4 x i32> [[SEXT_I22]], <4 x i32> [[SEXT_I]], <8 x i32> +; CHECK-NEXT: [[TMP3:%.*]] = sext <8 x i1> [[TMP2]] to <8 x i32> ; CHECK-NEXT: [[TMP5:%.*]] = bitcast <4 x i64> [[A:%.*]] to <32 x i8> ; CHECK-NEXT: [[TMP6:%.*]] = shufflevector <32 x i8> [[TMP5]], <32 x i8> poison, <16 x i32> ; CHECK-NEXT: [[TMP7:%.*]] = bitcast <4 x i64> [[B:%.*]] to <32 x i8> diff --git a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll index f804300..2031c2d 100644 --- a/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll +++ b/llvm/test/Transforms/VectorCombine/X86/shuffle-of-casts.ll @@ -6,9 +6,8 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @concat_zext_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = zext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext <8 x i16> [[A1:%.*]] to <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = zext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; %x0 = zext <8 x i16> %a0 to <8 x i32> @@ -19,9 +18,8 @@ define <16 x i32> @concat_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @concat_zext_nneg_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = zext nneg <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = zext nneg <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; %x0 = zext nneg <8 x i16> %a0 to <8 x i32> @@ -30,13 +28,17 @@ define <16 x i32> @concat_zext_nneg_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ret <16 x i32> %r } -; TODO - sext + zext nneg -> sext define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a1) { -; CHECK-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( -; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> -; CHECK-NEXT: ret <16 x i32> [[R]] +; SSE-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( +; SSE-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> +; SSE-NEXT: [[X1:%.*]] = zext nneg <8 x i16> [[A1:%.*]] to <8 x i32> +; SSE-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; SSE-NEXT: ret <16 x i32> [[R]] +; +; AVX-LABEL: @concat_sext_zext_nneg_v8i16_v8i32( +; AVX-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; AVX-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> +; AVX-NEXT: ret <16 x i32> [[R]] ; %x0 = sext <8 x i16> %a0 to <8 x i32> %x1 = zext nneg <8 x i16> %a1 to <8 x i32> @@ -46,9 +48,8 @@ define <16 x i32> @concat_sext_zext_nneg_v8i16_v8i32(<8 x i16> %a0, <8 x i16> %a define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @concat_sext_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; %x0 = sext <8 x i16> %a0 to <8 x i32> @@ -59,9 +60,8 @@ define <16 x i32> @concat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { ; CHECK-LABEL: @concat_sext_v4i1_v8i32( -; CHECK-NEXT: [[X0:%.*]] = sext <4 x i1> [[A0:%.*]] to <4 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <4 x i1> [[A1:%.*]] to <4 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i32> [[X0]], <4 x i32> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i1> [[A0:%.*]], <4 x i1> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i32> ; CHECK-NEXT: ret <8 x i32> [[R]] ; %x0 = sext <4 x i1> %a0 to <4 x i32> @@ -72,9 +72,8 @@ define <8 x i32> @concat_sext_v4i1_v8i32(<4 x i1> %a0, <4 x i1> %a1) { define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @concat_trunc_v4i32_v8i16( -; CHECK-NEXT: [[X0:%.*]] = trunc <4 x i32> [[A0:%.*]] to <4 x i16> -; CHECK-NEXT: [[X1:%.*]] = trunc <4 x i32> [[A1:%.*]] to <4 x i16> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x i16> [[X0]], <4 x i16> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = trunc <8 x i32> [[TMP1]] to <8 x i16> ; CHECK-NEXT: ret <8 x i16> [[R]] ; %x0 = trunc <4 x i32> %a0 to <4 x i16> @@ -85,9 +84,8 @@ define <8 x i16> @concat_trunc_v4i32_v8i16(<4 x i32> %a0, <4 x i32> %a1) { define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { ; CHECK-LABEL: @concat_inttoptr_v4i32_v8iptr( -; CHECK-NEXT: [[X0:%.*]] = inttoptr <4 x i32> [[A0:%.*]] to <4 x ptr> -; CHECK-NEXT: [[X1:%.*]] = inttoptr <4 x i32> [[A1:%.*]] to <4 x ptr> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x ptr> [[X0]], <4 x ptr> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = inttoptr <8 x i32> [[TMP1]] to <8 x ptr> ; CHECK-NEXT: ret <8 x ptr> [[R]] ; %x0 = inttoptr <4 x i32> %a0 to <4 x ptr> @@ -98,9 +96,8 @@ define <8 x ptr> @concat_inttoptr_v4i32_v8iptr(<4 x i32> %a0, <4 x i32> %a1) { define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { ; CHECK-LABEL: @concat_ptrtoint_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = ptrtoint <8 x ptr> [[A0:%.*]] to <8 x i64> -; CHECK-NEXT: [[X1:%.*]] = ptrtoint <8 x ptr> [[A1:%.*]] to <8 x i64> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i64> [[X0]], <8 x i64> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x ptr> [[A0:%.*]], <8 x ptr> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = ptrtoint <16 x ptr> [[TMP1]] to <16 x i64> ; CHECK-NEXT: ret <16 x i64> [[R]] ; %x0 = ptrtoint <8 x ptr> %a0 to <8 x i64> @@ -110,11 +107,16 @@ define <16 x i64> @concat_ptrtoint_v8i16_v16i32(<8 x ptr> %a0, <8 x ptr> %a1) { } define <8 x double> @concat_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { -; CHECK-LABEL: @concat_fpext_v4f32_v8f64( -; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> -; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> -; CHECK-NEXT: ret <8 x double> [[R]] +; SSE-LABEL: @concat_fpext_v4f32_v8f64( +; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> +; SSE-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> +; SSE-NEXT: ret <8 x double> [[R]] +; +; AVX-LABEL: @concat_fpext_v4f32_v8f64( +; AVX-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> +; AVX-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> +; AVX-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> +; AVX-NEXT: ret <8 x double> [[R]] ; %x0 = fpext <4 x float> %a0 to <4 x double> %x1 = fpext <4 x float> %a1 to <4 x double> @@ -139,9 +141,8 @@ define <16 x float> @concat_fptrunc_v8f64_v16f32(<8 x double> %a0, <8 x double> define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { ; CHECK-LABEL: @rconcat_sext_v8i16_v16i32( -; CHECK-NEXT: [[X0:%.*]] = sext <8 x i16> [[A0:%.*]] to <8 x i32> -; CHECK-NEXT: [[X1:%.*]] = sext <8 x i16> [[A1:%.*]] to <8 x i32> -; CHECK-NEXT: [[R:%.*]] = shufflevector <8 x i32> [[X0]], <8 x i32> [[X1]], <16 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> [[A1:%.*]], <16 x i32> +; CHECK-NEXT: [[R:%.*]] = sext <16 x i16> [[TMP1]] to <16 x i32> ; CHECK-NEXT: ret <16 x i32> [[R]] ; %x0 = sext <8 x i16> %a0 to <8 x i32> @@ -154,9 +155,8 @@ define <16 x i32> @rconcat_sext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { define <8 x double> @interleave_fpext_v4f32_v8f64(<4 x float> %a0, <4 x float> %a1) { ; CHECK-LABEL: @interleave_fpext_v4f32_v8f64( -; CHECK-NEXT: [[X0:%.*]] = fpext <4 x float> [[A0:%.*]] to <4 x double> -; CHECK-NEXT: [[X1:%.*]] = fpext <4 x float> [[A1:%.*]] to <4 x double> -; CHECK-NEXT: [[R:%.*]] = shufflevector <4 x double> [[X0]], <4 x double> [[X1]], <8 x i32> +; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> [[A1:%.*]], <8 x i32> +; CHECK-NEXT: [[R:%.*]] = fpext <8 x float> [[TMP1]] to <8 x double> ; CHECK-NEXT: ret <8 x double> [[R]] ; %x0 = fpext <4 x float> %a0 to <4 x double> @@ -226,6 +226,3 @@ define <16 x i32> @concat_sext_zext_v8i16_v16i32(<8 x i16> %a0, <8 x i16> %a1) { %r = shufflevector <8 x i32> %x0, <8 x i32> %x1, <16 x i32> ret <16 x i32> %r } -;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: -; AVX: {{.*}} -; SSE: {{.*}} -- cgit v1.1 From bf2d7858e5998bed28d437973588e0294bd54bcf Mon Sep 17 00:00:00 2001 From: Zahira Ammarguellat Date: Thu, 4 Apr 2024 03:25:41 -0700 Subject: Fix bug in the type promotion for complex division in strict FP mode. (#87500) Complex division on Windows with `-fcomplex-arithmetic=promoted` and `-ffp-model=strict` is crashing. This patch fixes the issue. See https://godbolt.org/z/15Gh7nvdM --- clang/lib/CodeGen/CGExprComplex.cpp | 6 +- clang/test/CodeGen/cx-complex-range.c | 656 ++++++++++++++++++++++++++++++++++ 2 files changed, 659 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGExprComplex.cpp b/clang/lib/CodeGen/CGExprComplex.cpp index a793b21..1facadd 100644 --- a/clang/lib/CodeGen/CGExprComplex.cpp +++ b/clang/lib/CodeGen/CGExprComplex.cpp @@ -319,12 +319,12 @@ public: // doubles the exponent of SmallerType.LargestFiniteVal) if (llvm::APFloat::semanticsMaxExponent(ElementTypeSemantics) * 2 + 1 <= llvm::APFloat::semanticsMaxExponent(HigherElementTypeSemantics)) { + FPHasBeenPromoted = true; return CGF.getContext().getComplexType(HigherElementType); } else { - FPHasBeenPromoted = true; DiagnosticsEngine &Diags = CGF.CGM.getDiags(); Diags.Report(diag::warn_next_larger_fp_type_same_size_than_fp); - return CGF.getContext().getComplexType(ElementType); + return QualType(); } } @@ -1037,7 +1037,7 @@ ComplexPairTy ComplexExprEmitter::EmitBinDiv(const BinOpInfo &Op) { LHSi = llvm::Constant::getNullValue(RHSi->getType()); if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Improved || (Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted && - FPHasBeenPromoted)) + !FPHasBeenPromoted)) return EmitRangeReductionDiv(LHSr, LHSi, RHSr, RHSi); else if (Op.FPFeatures.getComplexRange() == LangOptions::CX_Basic || Op.FPFeatures.getComplexRange() == LangOptions::CX_Promoted) diff --git a/clang/test/CodeGen/cx-complex-range.c b/clang/test/CodeGen/cx-complex-range.c index 9ec8025..38f9923 100644 --- a/clang/test/CodeGen/cx-complex-range.c +++ b/clang/test/CodeGen/cx-complex-range.c @@ -48,6 +48,15 @@ // RUN: -ffast-math -complex-range=promoted -emit-llvm -o - %s \ // RUN: | FileCheck %s --check-prefix=PRMTD_FAST +// strict math mode +// RUN: %clang_cc1 -triple x86_64-windows-pc -complex-range=promoted \ +// RUN: -ffp-contract=off -frounding-math -ffp-exception-behavior=strict \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=X86WINPRMTD_STRICT + +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -complex-range=promoted \ +// RUN: -ffp-contract=off -frounding-math -ffp-exception-behavior=strict \ +// RUN: -emit-llvm -o - %s | FileCheck %s --check-prefix=PRMTD_STRICT + // FULL-LABEL: define dso_local <2 x float> @divf( // FULL-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { // FULL-NEXT: entry: @@ -504,6 +513,86 @@ // PRMTD_FAST-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 // PRMTD_FAST-NEXT: ret <2 x float> [[TMP11]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local i64 @divf( +// X86WINPRMTD_STRICT-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD_STRICT-NEXT: store i64 [[B_COERCE]], ptr [[B]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[EXT:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_REAL]], metadata !"fpexcept.strict") #[[ATTR3:[0-9]+]] +// X86WINPRMTD_STRICT-NEXT: [[EXT1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[EXT2:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[B_REAL]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[EXT3:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[B_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT1]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[TMP1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP3:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT2]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP4:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT3]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP5:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP3]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT1]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP7:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP8:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP6]], double [[TMP7]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP2]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP10:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP8]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[UNPROMOTION:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP9]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[UNPROMOTION4:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[TMP11:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD_STRICT-NEXT: ret i64 [[TMP11]] +// +// PRMTD_STRICT-LABEL: define dso_local <2 x float> @divf( +// PRMTD_STRICT-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[EXT:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_REAL]], metadata !"fpexcept.strict") #[[ATTR4:[0-9]+]] +// PRMTD_STRICT-NEXT: [[EXT1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[EXT2:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[B_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT3:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[B_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT1]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP0]], double [[TMP1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP3:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT2]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP4:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT3]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP5:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP3]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT1]], double [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP7:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP8:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP6]], double [[TMP7]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP2]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP10:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP8]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP9]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION4:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP10]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_STRICT-NEXT: store float [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[TMP11:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_STRICT-NEXT: ret <2 x float> [[TMP11]] +// _Complex float divf(_Complex float a, _Complex float b) { return a / b; } @@ -873,6 +962,64 @@ _Complex float divf(_Complex float a, _Complex float b) { // PRMTD_FAST-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 // PRMTD_FAST-NEXT: ret <2 x float> [[TMP0]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local i64 @mulf( +// X86WINPRMTD_STRICT-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD_STRICT-NEXT: store i64 [[B_COERCE]], ptr [[B]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_REAL]], float [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_IMAG]], float [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_REAL]], float [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_IMAG]], float [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[MUL_AC]], float [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[MUL_AD]], float [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD_STRICT-NEXT: ret i64 [[TMP0]] +// +// PRMTD_STRICT-LABEL: define dso_local <2 x float> @mulf( +// PRMTD_STRICT-SAME: <2 x float> noundef [[A_COERCE:%.*]], <2 x float> noundef [[B_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[B:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[B_COERCE]], ptr [[B]], align 4 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load float, ptr [[B_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load float, ptr [[B_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_REAL]], float [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_IMAG]], float [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_REAL]], float [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call float @llvm.experimental.constrained.fmul.f32(float [[A_IMAG]], float [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call float @llvm.experimental.constrained.fsub.f32(float [[MUL_AC]], float [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call float @llvm.experimental.constrained.fadd.f32(float [[MUL_AD]], float [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store float [[MUL_R]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_STRICT-NEXT: store float [[MUL_I]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_STRICT-NEXT: ret <2 x float> [[TMP0]] +// _Complex float mulf(_Complex float a, _Complex float b) { return a * b; } @@ -1411,6 +1558,112 @@ _Complex float mulf(_Complex float a, _Complex float b) { // PRMTD_FAST-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 // PRMTD_FAST-NEXT: ret { double, double } [[TMP15]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local void @divd( +// X86WINPRMTD_STRICT-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[B_IMAG]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP3:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP2]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP4:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[B_REAL]], double [[TMP3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP5:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A_REAL]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP7:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP6]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP8:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A_IMAG]], double [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP10:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP9]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[B_REAL]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP11]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[B_IMAG]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP14:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[A_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP16:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP15]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP18:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP17]], double [[A_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP18]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD_STRICT: complex_div: +// X86WINPRMTD_STRICT-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[TMP20]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[TMP21]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD_STRICT-NEXT: ret void +// +// PRMTD_STRICT-LABEL: define dso_local { double, double } @divd( +// PRMTD_STRICT-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR2:[0-9]+]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD_STRICT-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD_STRICT-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[EXT:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double [[A_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double [[A_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[EXT2:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double [[B_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT3:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f64(double [[B_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT]], x86_fp80 [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP5:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT1]], x86_fp80 [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP6:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[TMP4]], x86_fp80 [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP7:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT2]], x86_fp80 [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP8:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT3]], x86_fp80 [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP9:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[TMP7]], x86_fp80 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP10:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT1]], x86_fp80 [[EXT2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP11:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[EXT]], x86_fp80 [[EXT3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP12:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[TMP10]], x86_fp80 [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP13:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP6]], x86_fp80 [[TMP9]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP14:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP12]], x86_fp80 [[TMP9]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION:%.*]] = call double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80 [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION4:%.*]] = call double @llvm.experimental.constrained.fptrunc.f64.f80(x86_fp80 [[TMP14]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 8 +// PRMTD_STRICT-NEXT: store double [[UNPROMOTION4]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[TMP15:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD_STRICT-NEXT: ret { double, double } [[TMP15]] +// _Complex double divd(_Complex double a, _Complex double b) { return a / b; } @@ -1834,6 +2087,78 @@ _Complex double divd(_Complex double a, _Complex double b) { // PRMTD_FAST-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 // PRMTD_FAST-NEXT: ret { double, double } [[TMP4]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local void @muld( +// X86WINPRMTD_STRICT-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[MUL_AC]], double [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[MUL_AD]], double [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD_STRICT-NEXT: ret void +// +// PRMTD_STRICT-LABEL: define dso_local { double, double } @muld( +// PRMTD_STRICT-SAME: double noundef [[A_COERCE0:%.*]], double noundef [[A_COERCE1:%.*]], double noundef [[B_COERCE0:%.*]], double noundef [[B_COERCE1:%.*]]) #[[ATTR2]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[A:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[B:%.*]] = alloca { double, double }, align 8 +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: store double [[A_COERCE0]], ptr [[TMP0]], align 8 +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[A_COERCE1]], ptr [[TMP1]], align 8 +// PRMTD_STRICT-NEXT: [[TMP2:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: store double [[B_COERCE0]], ptr [[TMP2]], align 8 +// PRMTD_STRICT-NEXT: [[TMP3:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[B_COERCE1]], ptr [[TMP3]], align 8 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[MUL_AC]], double [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[MUL_AD]], double [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store double [[MUL_R]], ptr [[RETVAL_REALP]], align 8 +// PRMTD_STRICT-NEXT: store double [[MUL_I]], ptr [[RETVAL_IMAGP]], align 8 +// PRMTD_STRICT-NEXT: [[TMP4:%.*]] = load { double, double }, ptr [[RETVAL]], align 8 +// PRMTD_STRICT-NEXT: ret { double, double } [[TMP4]] +// _Complex double muld(_Complex double a, _Complex double b) { return a * b; } @@ -2316,6 +2641,114 @@ _Complex double muld(_Complex double a, _Complex double b) { // PRMTD_FAST-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 // PRMTD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local void @divld( +// X86WINPRMTD_STRICT-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[B_REAL]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[B_IMAG]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[B_IMAG]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP3:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP2]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP4:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[B_REAL]], double [[TMP3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP5:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[A_REAL]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP7:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP6]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP8:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[A_IMAG]], double [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP10:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP9]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[B_REAL]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP11]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[B_IMAG]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP14:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[A_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP16:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP15]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP18:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP17]], double [[A_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP18]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD_STRICT: complex_div: +// X86WINPRMTD_STRICT-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[TMP20]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[TMP21]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD_STRICT-NEXT: ret void +// +// PRMTD_STRICT-LABEL: define dso_local { x86_fp80, x86_fp80 } @divld( +// PRMTD_STRICT-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_REAL]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[B_IMAG]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD_STRICT-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[B_IMAG]], x86_fp80 [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP3:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[TMP2]], x86_fp80 [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[B_REAL]], x86_fp80 [[TMP3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP5:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_IMAG]], x86_fp80 [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP6:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[A_REAL]], x86_fp80 [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP7:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP6]], x86_fp80 [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP8:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_REAL]], x86_fp80 [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP9:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[A_IMAG]], x86_fp80 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP10:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP9]], x86_fp80 [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD_STRICT: abs_rhsr_less_than_abs_rhsi: +// PRMTD_STRICT-NEXT: [[TMP11:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[B_REAL]], x86_fp80 [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP12:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[TMP11]], x86_fp80 [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP13:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[B_IMAG]], x86_fp80 [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP14:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_REAL]], x86_fp80 [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP15:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[TMP14]], x86_fp80 [[A_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP16:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP15]], x86_fp80 [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP17:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_IMAG]], x86_fp80 [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP18:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[TMP17]], x86_fp80 [[A_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP19:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP18]], x86_fp80 [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br label [[COMPLEX_DIV]] +// PRMTD_STRICT: complex_div: +// PRMTD_STRICT-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_STRICT-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store x86_fp80 [[TMP20]], ptr [[RETVAL_REALP]], align 16 +// PRMTD_STRICT-NEXT: store x86_fp80 [[TMP21]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[TMP22:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD_STRICT-NEXT: ret { x86_fp80, x86_fp80 } [[TMP22]] +// _Complex long double divld(_Complex long double a, _Complex long double b) { return a / b; } @@ -2659,6 +3092,68 @@ _Complex long double divld(_Complex long double a, _Complex long double b) { // PRMTD_FAST-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 // PRMTD_FAST-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local void @mulld( +// X86WINPRMTD_STRICT-SAME: ptr dead_on_unwind noalias writable sret({ double, double }) align 8 [[AGG_RESULT:%.*]], ptr noundef [[A:%.*]], ptr noundef [[B:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RESULT_PTR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[AGG_RESULT]], ptr [[RESULT_PTR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: store ptr [[A]], ptr [[A_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load double, ptr [[A_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load double, ptr [[A_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_REAL]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[A_IMAG]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[MUL_AC]], double [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[MUL_AD]], double [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[MUL_R]], ptr [[AGG_RESULT_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[MUL_I]], ptr [[AGG_RESULT_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP1:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REAL:%.*]] = load double, ptr [[AGG_RESULT_REALP1]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP2:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAG:%.*]] = load double, ptr [[AGG_RESULT_IMAGP2]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_REALP3:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[AGG_RESULT_IMAGP4:%.*]] = getelementptr inbounds { double, double }, ptr [[AGG_RESULT]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_REAL]], ptr [[AGG_RESULT_REALP3]], align 8 +// X86WINPRMTD_STRICT-NEXT: store double [[AGG_RESULT_IMAG]], ptr [[AGG_RESULT_IMAGP4]], align 8 +// X86WINPRMTD_STRICT-NEXT: ret void +// +// PRMTD_STRICT-LABEL: define dso_local { x86_fp80, x86_fp80 } @mulld( +// PRMTD_STRICT-SAME: ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[A:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]]) #[[ATTR2]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { x86_fp80, x86_fp80 }, align 16 +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load x86_fp80, ptr [[A_REALP]], align 16 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load x86_fp80, ptr [[A_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[MUL_AC:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_REAL]], x86_fp80 [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BD:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_IMAG]], x86_fp80 [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_AD:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_REAL]], x86_fp80 [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_BC:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[A_IMAG]], x86_fp80 [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_R:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[MUL_AC]], x86_fp80 [[MUL_BD]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[MUL_I:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[MUL_AD]], x86_fp80 [[MUL_BC]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store x86_fp80 [[MUL_R]], ptr [[RETVAL_REALP]], align 16 +// PRMTD_STRICT-NEXT: store x86_fp80 [[MUL_I]], ptr [[RETVAL_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = load { x86_fp80, x86_fp80 }, ptr [[RETVAL]], align 16 +// PRMTD_STRICT-NEXT: ret { x86_fp80, x86_fp80 } [[TMP0]] +// _Complex long double mulld(_Complex long double a, _Complex long double b) { return a * b; } @@ -3446,6 +3941,167 @@ _Complex long double mulld(_Complex long double a, _Complex long double b) { // PRMTD_FAST-NEXT: [[TMP33:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 // PRMTD_FAST-NEXT: ret <2 x float> [[TMP33]] // +// X86WINPRMTD_STRICT-LABEL: define dso_local i64 @f1( +// X86WINPRMTD_STRICT-SAME: i64 noundef [[A_COERCE:%.*]], ptr noundef [[B:%.*]], i64 noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// X86WINPRMTD_STRICT-NEXT: entry: +// X86WINPRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// X86WINPRMTD_STRICT-NEXT: [[B_INDIRECT_ADDR:%.*]] = alloca ptr, align 8 +// X86WINPRMTD_STRICT-NEXT: store i64 [[A_COERCE]], ptr [[A]], align 4 +// X86WINPRMTD_STRICT-NEXT: store i64 [[C_COERCE]], ptr [[C]], align 4 +// X86WINPRMTD_STRICT-NEXT: store ptr [[B]], ptr [[B_INDIRECT_ADDR]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load double, ptr [[B_REALP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { double, double }, ptr [[B]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load double, ptr [[B_IMAGP]], align 8 +// X86WINPRMTD_STRICT-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[CONV:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[CONV1:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP0:%.*]] = call double @llvm.fabs.f64(double [[CONV]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP1:%.*]] = call double @llvm.fabs.f64(double [[CONV1]]) #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f64(double [[TMP0]], double [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP2:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[CONV1]], double [[CONV]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP3:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP2]], double [[CONV1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP4:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[CONV]], double [[TMP3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP5:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[B_IMAG]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP6:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[B_REAL]], double [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP7:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP6]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP8:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[B_REAL]], double [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP9:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[B_IMAG]], double [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP10:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP9]], double [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV:%.*]] +// X86WINPRMTD_STRICT: abs_rhsr_less_than_abs_rhsi: +// X86WINPRMTD_STRICT-NEXT: [[TMP11:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[CONV]], double [[CONV1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP12:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[TMP11]], double [[CONV]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP13:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[CONV1]], double [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP14:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[B_REAL]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP15:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP14]], double [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP16:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP15]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP17:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[B_IMAG]], double [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP18:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP17]], double [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP19:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP18]], double [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: br label [[COMPLEX_DIV]] +// X86WINPRMTD_STRICT: complex_div: +// X86WINPRMTD_STRICT-NEXT: [[TMP20:%.*]] = phi double [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[TMP21:%.*]] = phi double [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// X86WINPRMTD_STRICT-NEXT: [[CONV2:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[CONV3:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[EXT:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CONV2]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[EXT4:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CONV3]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[EXT5:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_REAL]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[EXT6:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_IMAG]], metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP22:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP23:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT4]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP24:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP22]], double [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP25:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT5]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP26:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT6]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP27:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP25]], double [[TMP26]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP28:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT4]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP29:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP30:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP28]], double [[TMP29]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP31:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP24]], double [[TMP27]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP30]], double [[TMP27]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[UNPROMOTION:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[UNPROMOTION7:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP32]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR3]] +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// X86WINPRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// X86WINPRMTD_STRICT-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// X86WINPRMTD_STRICT-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 4 +// X86WINPRMTD_STRICT-NEXT: [[TMP33:%.*]] = load i64, ptr [[RETVAL]], align 4 +// X86WINPRMTD_STRICT-NEXT: ret i64 [[TMP33]] +// +// PRMTD_STRICT-LABEL: define dso_local <2 x float> @f1( +// PRMTD_STRICT-SAME: <2 x float> noundef [[A_COERCE:%.*]], ptr noundef byval({ x86_fp80, x86_fp80 }) align 16 [[B:%.*]], <2 x float> noundef [[C_COERCE:%.*]]) #[[ATTR0]] { +// PRMTD_STRICT-NEXT: entry: +// PRMTD_STRICT-NEXT: [[RETVAL:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[A:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: [[C:%.*]] = alloca { float, float }, align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[A_COERCE]], ptr [[A]], align 4 +// PRMTD_STRICT-NEXT: store <2 x float> [[C_COERCE]], ptr [[C]], align 4 +// PRMTD_STRICT-NEXT: [[B_REALP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[B_REAL:%.*]] = load x86_fp80, ptr [[B_REALP]], align 16 +// PRMTD_STRICT-NEXT: [[B_IMAGP:%.*]] = getelementptr inbounds { x86_fp80, x86_fp80 }, ptr [[B]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[B_IMAG:%.*]] = load x86_fp80, ptr [[B_IMAGP]], align 16 +// PRMTD_STRICT-NEXT: [[C_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[C_REAL:%.*]] = load float, ptr [[C_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[C_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[C]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[C_IMAG:%.*]] = load float, ptr [[C_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[CONV:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[CONV1:%.*]] = call x86_fp80 @llvm.experimental.constrained.fpext.f80.f32(float [[C_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP0:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP1:%.*]] = call x86_fp80 @llvm.fabs.f80(x86_fp80 [[CONV1]]) #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[ABS_CMP:%.*]] = call i1 @llvm.experimental.constrained.fcmp.f80(x86_fp80 [[TMP0]], x86_fp80 [[TMP1]], metadata !"ugt", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br i1 [[ABS_CMP]], label [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI:%.*]], label [[ABS_RHSR_LESS_THAN_ABS_RHSI:%.*]] +// PRMTD_STRICT: abs_rhsr_greater_or_equal_abs_rhsi: +// PRMTD_STRICT-NEXT: [[TMP2:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[CONV1]], x86_fp80 [[CONV]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP3:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[TMP2]], x86_fp80 [[CONV1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP4:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[CONV]], x86_fp80 [[TMP3]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP5:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[B_IMAG]], x86_fp80 [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP6:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[B_REAL]], x86_fp80 [[TMP5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP7:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP6]], x86_fp80 [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP8:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[B_REAL]], x86_fp80 [[TMP2]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP9:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[B_IMAG]], x86_fp80 [[TMP8]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP10:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP9]], x86_fp80 [[TMP4]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br label [[COMPLEX_DIV:%.*]] +// PRMTD_STRICT: abs_rhsr_less_than_abs_rhsi: +// PRMTD_STRICT-NEXT: [[TMP11:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[CONV]], x86_fp80 [[CONV1]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP12:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[TMP11]], x86_fp80 [[CONV]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP13:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[CONV1]], x86_fp80 [[TMP12]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP14:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[B_REAL]], x86_fp80 [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP15:%.*]] = call x86_fp80 @llvm.experimental.constrained.fadd.f80(x86_fp80 [[TMP14]], x86_fp80 [[B_IMAG]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP16:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP15]], x86_fp80 [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP17:%.*]] = call x86_fp80 @llvm.experimental.constrained.fmul.f80(x86_fp80 [[B_IMAG]], x86_fp80 [[TMP11]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP18:%.*]] = call x86_fp80 @llvm.experimental.constrained.fsub.f80(x86_fp80 [[TMP17]], x86_fp80 [[B_REAL]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP19:%.*]] = call x86_fp80 @llvm.experimental.constrained.fdiv.f80(x86_fp80 [[TMP18]], x86_fp80 [[TMP13]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: br label [[COMPLEX_DIV]] +// PRMTD_STRICT: complex_div: +// PRMTD_STRICT-NEXT: [[TMP20:%.*]] = phi x86_fp80 [ [[TMP7]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP16]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_STRICT-NEXT: [[TMP21:%.*]] = phi x86_fp80 [ [[TMP10]], [[ABS_RHSR_GREATER_OR_EQUAL_ABS_RHSI]] ], [ [[TMP19]], [[ABS_RHSR_LESS_THAN_ABS_RHSI]] ] +// PRMTD_STRICT-NEXT: [[CONV2:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80 [[TMP20]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[CONV3:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f80(x86_fp80 [[TMP21]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CONV2]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT4:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[CONV3]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[A_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[A_REAL:%.*]] = load float, ptr [[A_REALP]], align 4 +// PRMTD_STRICT-NEXT: [[A_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[A]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: [[A_IMAG:%.*]] = load float, ptr [[A_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[EXT5:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_REAL]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[EXT6:%.*]] = call double @llvm.experimental.constrained.fpext.f64.f32(float [[A_IMAG]], metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP22:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP23:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT4]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP24:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP22]], double [[TMP23]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP25:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT5]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP26:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT6]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP27:%.*]] = call double @llvm.experimental.constrained.fadd.f64(double [[TMP25]], double [[TMP26]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP28:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT4]], double [[EXT5]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP29:%.*]] = call double @llvm.experimental.constrained.fmul.f64(double [[EXT]], double [[EXT6]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP30:%.*]] = call double @llvm.experimental.constrained.fsub.f64(double [[TMP28]], double [[TMP29]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP31:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP24]], double [[TMP27]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[TMP32:%.*]] = call double @llvm.experimental.constrained.fdiv.f64(double [[TMP30]], double [[TMP27]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP31]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[UNPROMOTION7:%.*]] = call float @llvm.experimental.constrained.fptrunc.f32.f64(double [[TMP32]], metadata !"round.dynamic", metadata !"fpexcept.strict") #[[ATTR4]] +// PRMTD_STRICT-NEXT: [[RETVAL_REALP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 0 +// PRMTD_STRICT-NEXT: [[RETVAL_IMAGP:%.*]] = getelementptr inbounds { float, float }, ptr [[RETVAL]], i32 0, i32 1 +// PRMTD_STRICT-NEXT: store float [[UNPROMOTION]], ptr [[RETVAL_REALP]], align 4 +// PRMTD_STRICT-NEXT: store float [[UNPROMOTION7]], ptr [[RETVAL_IMAGP]], align 4 +// PRMTD_STRICT-NEXT: [[TMP33:%.*]] = load <2 x float>, ptr [[RETVAL]], align 4 +// PRMTD_STRICT-NEXT: ret <2 x float> [[TMP33]] +// _Complex float f1(_Complex float a, _Complex long double b, _Complex float c) { return (_Complex float)(b / c) / a; } +//. +// FULL: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +//. +// FULL_FAST: [[PROF2]] = !{!"branch_weights", i32 1, i32 1048575} +//. -- cgit v1.1 From 608a663c8ee485c42637d021d554c8d264d556b1 Mon Sep 17 00:00:00 2001 From: Philip Lassen Date: Thu, 4 Apr 2024 03:26:33 -0700 Subject: [MLIR] Clean up pass options for test-loop-fusion and affine-super-vectorizer-test (#87606) Before the change `test-loop-fusion` and `affine-super-vectorizer-test` options were in their own category. This was because they used the standard llvm command line parsing with `llvm::cl::opt`. This PR moves them over to the mlir `Pass::Option` class. Before the change ``` $ mlir-opt --help ... General options: ... Compiler passes to run Passes: ... Pass Pipelines: ... Generic Options: .... affine-super-vectorizer-test options: --backward-slicing ... --vectorize-affine-loop-nest test-loop-fusion options: --test-loop-fusion-dependence-check ... --test-loop-fusion-transformation ``` After the change ``` $ mlir-opt --help ... General options: ... Compiler passes to run Passes: ... --affine-super-vectorizer-test --backward-slicing ... --vectorize-affine-loop-nest ... --test-loop-fusion options: --test-loop-fusion-dependence-check ... --test-loop-fusion-transformation ... Pass Pipelines: ... Generic Options: ... ``` --------- Signed-off-by: philass --- .../Affine/SuperVectorize/compose_maps.mlir | 2 +- .../Affine/SuperVectorize/vector_utils.mlir | 6 +- .../SuperVectorize/vectorize_unsupported.mlir | 2 +- .../Affine/loop-fusion-dependence-check.mlir | 2 +- .../Affine/loop-fusion-slice-computation.mlir | 2 +- .../Dialect/Affine/loop-fusion-transformation.mlir | 2 +- mlir/test/Dialect/Affine/slicing-utils.mlir | 6 +- mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp | 35 ++++++------ .../lib/Dialect/Affine/TestVectorizationUtils.cpp | 64 +++++++++++----------- 9 files changed, 60 insertions(+), 61 deletions(-) diff --git a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir index b53fc55..d998ed8 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/compose_maps.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test -compose-maps -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -affine-super-vectorizer-test=compose-maps -split-input-file 2>&1 | FileCheck %s // For all these cases, the test traverses the `test_affine_map` ops and // composes them in order one-by-one. diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir index e58de9f..bd71164 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vector_utils.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt %s -affine-super-vectorizer-test -vector-shape-ratio 4 -vector-shape-ratio 8 2>&1 | FileCheck %s -// RUN: mlir-opt %s -affine-super-vectorizer-test -vector-shape-ratio 2 -vector-shape-ratio 5 -vector-shape-ratio 2 2>&1 | FileCheck %s -check-prefix=TEST-3x4x5x8 -// RUN: mlir-opt %s -affine-super-vectorizer-test -vectorize-affine-loop-nest 2>&1 | FileCheck %s -check-prefix=VECNEST +// RUN: mlir-opt %s -affine-super-vectorizer-test="vector-shape-ratio=4,8" 2>&1 | FileCheck %s +// RUN: mlir-opt %s -affine-super-vectorizer-test="vector-shape-ratio=2,5,2" 2>&1 | FileCheck %s -check-prefix=TEST-3x4x5x8 +// RUN: mlir-opt %s -affine-super-vectorizer-test=vectorize-affine-loop-nest 2>&1 | FileCheck %s -check-prefix=VECNEST func.func @vector_add_2d(%arg0: index, %arg1: index) -> f32 { // Nothing should be matched in this first block. diff --git a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir index c117bfc..6c1a7c4 100644 --- a/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir +++ b/mlir/test/Dialect/Affine/SuperVectorize/vectorize_unsupported.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -affine-super-vectorizer-test -vectorize-affine-loop-nest -split-input-file 2>&1 | FileCheck %s +// RUN: mlir-opt %s -affine-super-vectorizer-test=vectorize-affine-loop-nest -split-input-file 2>&1 | FileCheck %s func.func @unparallel_loop_reduction_unsupported(%in: memref<256x512xf32>, %out: memref<256xf32>) { // CHECK: Outermost loop cannot be parallel diff --git a/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir b/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir index aa872b0..2c53852 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-dependence-check.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion -test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt -allow-unregistered-dialect %s -test-loop-fusion=test-loop-fusion-dependence-check -split-input-file -verify-diagnostics | FileCheck %s // ----- diff --git a/mlir/test/Dialect/Affine/loop-fusion-slice-computation.mlir b/mlir/test/Dialect/Affine/loop-fusion-slice-computation.mlir index c303dd0..aa79ee2 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-slice-computation.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-slice-computation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -test-loop-fusion -test-loop-fusion-slice-computation -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt %s -test-loop-fusion=test-loop-fusion-slice-computation -split-input-file -verify-diagnostics | FileCheck %s // ----- diff --git a/mlir/test/Dialect/Affine/loop-fusion-transformation.mlir b/mlir/test/Dialect/Affine/loop-fusion-transformation.mlir index c8e0918..4f4163a 100644 --- a/mlir/test/Dialect/Affine/loop-fusion-transformation.mlir +++ b/mlir/test/Dialect/Affine/loop-fusion-transformation.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion -test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s +// RUN: mlir-opt %s -allow-unregistered-dialect -test-loop-fusion=test-loop-fusion-transformation -split-input-file -canonicalize | FileCheck %s // CHECK-LABEL: func @slice_depth1_loop_nest() { func.func @slice_depth1_loop_nest() { diff --git a/mlir/test/Dialect/Affine/slicing-utils.mlir b/mlir/test/Dialect/Affine/slicing-utils.mlir index 71bd8ad..7437997 100644 --- a/mlir/test/Dialect/Affine/slicing-utils.mlir +++ b/mlir/test/Dialect/Affine/slicing-utils.mlir @@ -1,6 +1,6 @@ -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -forward-slicing=true 2>&1 | FileCheck %s --check-prefix=FWD -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -backward-slicing=true 2>&1 | FileCheck %s --check-prefix=BWD -// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test -slicing=true 2>&1 | FileCheck %s --check-prefix=FWDBWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test="forward-slicing=true" 2>&1 | FileCheck %s --check-prefix=FWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test="backward-slicing=true" 2>&1 | FileCheck %s --check-prefix=BWD +// RUN: mlir-opt -allow-unregistered-dialect %s -split-input-file -affine-super-vectorizer-test="slicing=true" 2>&1 | FileCheck %s --check-prefix=FWDBWD /// 1 2 3 4 /// |_______| |______| diff --git a/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp index f4f1593..1901180 100644 --- a/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp +++ b/mlir/test/lib/Dialect/Affine/TestLoopFusion.cpp @@ -22,23 +22,6 @@ using namespace mlir; using namespace mlir::affine; -static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); - -static llvm::cl::opt clTestDependenceCheck( - "test-loop-fusion-dependence-check", - llvm::cl::desc("Enable testing of loop fusion dependence check"), - llvm::cl::cat(clOptionsCategory)); - -static llvm::cl::opt clTestSliceComputation( - "test-loop-fusion-slice-computation", - llvm::cl::desc("Enable testing of loop fusion slice computation"), - llvm::cl::cat(clOptionsCategory)); - -static llvm::cl::opt clTestLoopFusionTransformation( - "test-loop-fusion-transformation", - llvm::cl::desc("Enable testing of loop fusion transformation"), - llvm::cl::cat(clOptionsCategory)); - namespace { struct TestLoopFusion @@ -50,6 +33,24 @@ struct TestLoopFusion return "Tests loop fusion utility functions."; } void runOnOperation() override; + + TestLoopFusion() = default; + TestLoopFusion(const TestLoopFusion &pass) : PassWrapper(pass){}; + + Option clTestDependenceCheck{ + *this, "test-loop-fusion-dependence-check", + llvm::cl::desc("Enable testing of loop fusion dependence check"), + llvm::cl::init(false)}; + + Option clTestSliceComputation{ + *this, "test-loop-fusion-slice-computation", + llvm::cl::desc("Enable testing of loop fusion slice computation"), + llvm::cl::init(false)}; + + Option clTestLoopFusionTransformation{ + *this, "test-loop-fusion-transformation", + llvm::cl::desc("Enable testing of loop fusion transformation"), + llvm::cl::init(false)}; }; } // namespace diff --git a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp index b497f8d..598678f 100644 --- a/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp +++ b/mlir/test/lib/Dialect/Affine/TestVectorizationUtils.cpp @@ -37,39 +37,6 @@ using namespace mlir::affine; static llvm::cl::OptionCategory clOptionsCategory(DEBUG_TYPE " options"); -static llvm::cl::list clTestVectorShapeRatio( - "vector-shape-ratio", - llvm::cl::desc("Specify the HW vector size for vectorization"), - llvm::cl::cat(clOptionsCategory)); -static llvm::cl::opt clTestForwardSlicingAnalysis( - "forward-slicing", - llvm::cl::desc("Enable testing forward static slicing and topological sort " - "functionalities"), - llvm::cl::cat(clOptionsCategory)); -static llvm::cl::opt clTestBackwardSlicingAnalysis( - "backward-slicing", - llvm::cl::desc("Enable testing backward static slicing and " - "topological sort functionalities"), - llvm::cl::cat(clOptionsCategory)); -static llvm::cl::opt clTestSlicingAnalysis( - "slicing", - llvm::cl::desc("Enable testing static slicing and topological sort " - "functionalities"), - llvm::cl::cat(clOptionsCategory)); -static llvm::cl::opt clTestComposeMaps( - "compose-maps", - llvm::cl::desc( - "Enable testing the composition of AffineMap where each " - "AffineMap in the composition is specified as the affine_map attribute " - "in a constant op."), - llvm::cl::cat(clOptionsCategory)); -static llvm::cl::opt clTestVecAffineLoopNest( - "vectorize-affine-loop-nest", - llvm::cl::desc( - "Enable testing for the 'vectorizeAffineLoopNest' utility by " - "vectorizing the outermost loops found"), - llvm::cl::cat(clOptionsCategory)); - namespace { struct VectorizerTestPass : public PassWrapper> { @@ -85,6 +52,37 @@ struct VectorizerTestPass return "Tests vectorizer standalone functionality."; } + VectorizerTestPass() = default; + VectorizerTestPass(const VectorizerTestPass &pass) : PassWrapper(pass){}; + + ListOption clTestVectorShapeRatio{ + *this, "vector-shape-ratio", + llvm::cl::desc("Specify the HW vector size for vectorization")}; + Option clTestForwardSlicingAnalysis{ + *this, "forward-slicing", + llvm::cl::desc( + "Enable testing forward static slicing and topological sort " + "functionalities")}; + Option clTestBackwardSlicingAnalysis{ + *this, "backward-slicing", + llvm::cl::desc("Enable testing backward static slicing and " + "topological sort functionalities")}; + Option clTestSlicingAnalysis{ + *this, "slicing", + llvm::cl::desc("Enable testing static slicing and topological sort " + "functionalities")}; + Option clTestComposeMaps{ + *this, "compose-maps", + llvm::cl::desc("Enable testing the composition of AffineMap where each " + "AffineMap in the composition is specified as the " + "affine_map attribute " + "in a constant op.")}; + Option clTestVecAffineLoopNest{ + *this, "vectorize-affine-loop-nest", + llvm::cl::desc( + "Enable testing for the 'vectorizeAffineLoopNest' utility by " + "vectorizing the outermost loops found")}; + void runOnOperation() override; void testVectorShapeRatio(llvm::raw_ostream &outs); void testForwardSlicing(llvm::raw_ostream &outs); -- cgit v1.1 From 4e0b8eae4cb4328f98e6b748c31050a704d378f6 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Fri, 29 Mar 2024 20:48:03 +0800 Subject: [RISCV] Add tests for vwsll for extends > .vf2. NFC These cannot be picked up by TableGen patterns alone and need to be handled by combineBinOp_VLToVWBinOp_VL --- llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll | 256 ++++++++++++++++++++++++++++ 1 file changed, 256 insertions(+) diff --git a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll index 770bb56..082de2e 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll +++ b/llvm/test/CodeGen/RISCV/rvv/vwsll-sdnode.ll @@ -627,3 +627,259 @@ define @vwsll_vi_nxv8i16( %a) { %z = shl %x, splat (i16 2) ret %z } + +; ============================================================================== +; i8 -> i64 +; ============================================================================== + +define @vwsll_vv_nxv2i64_nxv2i8_sext( %a, %b) { +; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = sext %b to + %z = shl %x, %y + ret %z +} + +define @vwsll_vv_nxv2i64_nxv2i8_zext( %a, %b) { +; CHECK-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vv_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %y = zext %b to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i64_nxv2i64_nxv2i8( %a, i64 %b) { +; CHECK-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vx v8, v10, a0 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i64_nxv2i64_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vx v8, v10, a0 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i64 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %z = shl %x, %splat + ret %z +} + +define @vwsll_vx_i32_nxv2i64_nxv2i8_sext( %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i32_nxv2i64_nxv2i8_zext( %a, i32 %b) { +; CHECK-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf2 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i32_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e32, m1, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf2 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i32 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i16_nxv2i64_nxv2i8_sext( %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i16_nxv2i64_nxv2i8_zext( %a, i16 %b) { +; CHECK-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf4 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i16_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e16, mf2, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf4 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i16 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i8_nxv2i64_nxv2i8_sext( %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_sext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = sext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vx_i8_nxv2i64_nxv2i8_zext( %a, i8 %b) { +; CHECK-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-NEXT: vmv.v.x v9, a0 +; CHECK-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vzext.vf8 v12, v9 +; CHECK-NEXT: vsll.vv v8, v10, v12 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vx_i8_nxv2i64_nxv2i8_zext: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a1, zero, e8, mf4, ta, ma +; CHECK-ZVBB-NEXT: vmv.v.x v9, a0 +; CHECK-ZVBB-NEXT: vsetvli zero, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vzext.vf8 v12, v9 +; CHECK-ZVBB-NEXT: vsll.vv v8, v10, v12 +; CHECK-ZVBB-NEXT: ret + %head = insertelement poison, i8 %b, i32 0 + %splat = shufflevector %head, poison, zeroinitializer + %x = zext %a to + %y = zext %splat to + %z = shl %x, %y + ret %z +} + +define @vwsll_vi_nxv2i64_nxv2i8( %a) { +; CHECK-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-NEXT: vzext.vf8 v10, v8 +; CHECK-NEXT: vsll.vi v8, v10, 2 +; CHECK-NEXT: ret +; +; CHECK-ZVBB-LABEL: vwsll_vi_nxv2i64_nxv2i8: +; CHECK-ZVBB: # %bb.0: +; CHECK-ZVBB-NEXT: vsetvli a0, zero, e64, m2, ta, ma +; CHECK-ZVBB-NEXT: vzext.vf8 v10, v8 +; CHECK-ZVBB-NEXT: vsll.vi v8, v10, 2 +; CHECK-ZVBB-NEXT: ret + %x = zext %a to + %z = shl %x, splat (i64 2) + ret %z +} -- cgit v1.1 From 7bd163d0a4b3c9c9375dc32c9c10162433c42180 Mon Sep 17 00:00:00 2001 From: Florian Hahn Date: Thu, 4 Apr 2024 12:05:08 +0100 Subject: [VPlan] Clean up dead recipes after UF & VF specific simplification. Recursively remove dead recipes after simplifying vector loop exit branch. --- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 23 ++++++++++++++++++++++ .../LoopVectorize/AArch64/clamped-trip-count.ll | 6 ++---- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 3753060..7d4e54d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -641,6 +641,25 @@ static void removeRedundantExpandSCEVRecipes(VPlan &Plan) { } } +static void recursivelyDeleteDeadRecipes(VPValue *V) { + SmallVector WorkList; + SmallPtrSet Seen; + WorkList.push_back(V); + + while (!WorkList.empty()) { + VPValue *Cur = WorkList.pop_back_val(); + if (!Seen.insert(Cur).second) + continue; + VPRecipeBase *R = Cur->getDefiningRecipe(); + if (!R) + continue; + if (!isDeadRecipe(*R)) + continue; + WorkList.append(R->op_begin(), R->op_end()); + R->eraseFromParent(); + } +} + void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, unsigned BestUF, PredicatedScalarEvolution &PSE) { @@ -674,7 +693,11 @@ void VPlanTransforms::optimizeForVFAndUF(VPlan &Plan, ElementCount BestVF, auto *BOC = new VPInstruction(VPInstruction::BranchOnCond, {Plan.getOrAddLiveIn(ConstantInt::getTrue(Ctx))}); + + SmallVector PossiblyDead(Term->operands()); Term->eraseFromParent(); + for (VPValue *Op : PossiblyDead) + recursivelyDeleteDeadRecipes(Op); ExitingVPBB->appendRecipe(BOC); Plan.setVF(BestVF); Plan.setUF(BestUF); diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll index 3e895edc..afd49aa 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/clamped-trip-count.ll @@ -43,9 +43,8 @@ define void @clamped_tc_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range(1,1 ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP16]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 8) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] @@ -135,9 +134,8 @@ define void @clamped_tc_max_8(ptr nocapture %dst, i32 %n, i64 %val) vscale_range ; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[NEXT_GEP]], i32 0 ; CHECK-NEXT: call void @llvm.masked.store.nxv8i8.p0( [[TMP16]], ptr [[TMP17]], i32 1, [[ACTIVE_LANE_MASK]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] -; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] -; CHECK-NEXT: [[TMP18:%.*]] = xor [[ACTIVE_LANE_MASK_NEXT]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer) +; CHECK-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call @llvm.get.active.lane.mask.nxv8i1.i64(i64 [[INDEX_NEXT]], i64 [[WIDE_TRIP_COUNT]]) ; CHECK-NEXT: br i1 true, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] ; CHECK: middle.block: ; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] -- cgit v1.1 From 8ae9c6259f833dce87f8d29402b1ddced90887b9 Mon Sep 17 00:00:00 2001 From: Guillaume Chatelet Date: Thu, 4 Apr 2024 13:19:43 +0200 Subject: [libc] Fix forward `constexpr` `add_with_carry` / `sub_with_borrow` Introduced in #87613. --- libc/src/__support/math_extras.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/src/__support/math_extras.h b/libc/src/__support/math_extras.h index bb6424b..4bd8719 100644 --- a/libc/src/__support/math_extras.h +++ b/libc/src/__support/math_extras.h @@ -86,7 +86,7 @@ add_with_carry(T a, T b, T carry_in, T &carry_out) { RETURN_IF(unsigned long long, __builtin_addcll) #endif } - T sum; + T sum = {}; T carry1 = add_overflow(a, b, sum); T carry2 = add_overflow(sum, carry_in, sum); carry_out = carry1 | carry2; @@ -112,7 +112,7 @@ sub_with_borrow(T a, T b, T carry_in, T &carry_out) { RETURN_IF(unsigned long long, __builtin_subcll) #endif } - T sub; + T sub = {}; T carry1 = sub_overflow(a, b, sub); T carry2 = sub_overflow(sub, carry_in, sub); carry_out = carry1 | carry2; -- cgit v1.1 From 1c7fda9f4cde336c4ac30c7478b223536c6eb6d6 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 11:49:20 +0100 Subject: Fix MSVC "result of 32-bit shift implicitly converted to 64 bits" warning. NFC. --- llvm/tools/llvm-readobj/ELFDumper.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/llvm-readobj/ELFDumper.cpp b/llvm/tools/llvm-readobj/ELFDumper.cpp index d6dda61..d353482 100644 --- a/llvm/tools/llvm-readobj/ELFDumper.cpp +++ b/llvm/tools/llvm-readobj/ELFDumper.cpp @@ -5157,7 +5157,7 @@ static bool printAArch64PAuthABICoreInfo(raw_ostream &OS, uint32_t DataSize, std::string Desc; for (uint32_t I = 0, End = Flags.size(); I < End; ++I) { - if (!(Version & (1 << I))) + if (!(Version & (1ULL << I))) Desc += '!'; Desc += Twine("PointerAuth" + Flags[I] + (I == End - 1 ? "" : ", ")).str(); -- cgit v1.1 From d54d476300d7aaeb6616a11b43e1b8006cad17c9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 12:24:24 +0100 Subject: [SLP] Fix Wunused-variable warning. NFC. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 9976954..79d146a 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -1973,7 +1973,7 @@ public: assert(isa(VL[0]) && "Expected instruction"); unsigned NumOperands = cast(VL[0])->getNumOperands(); constexpr unsigned IntrinsicNumOperands = 2; - if (auto *CI = dyn_cast(VL[0])) + if (isa(VL[0])) NumOperands = IntrinsicNumOperands; OpsVec.resize(NumOperands); unsigned NumLanes = VL.size(); -- cgit v1.1 From 3871eaba6bd016b229f2d0e4b75e2be3b65e39a7 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 12:26:25 +0100 Subject: [CostModel][X86] Update AVX1 sext v8i1 -> v8i32 cost based off worst case llvm-mca numbers We were using raw instruction count which overestimated the costs for #67803 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- llvm/test/Analysis/CostModel/X86/cast.ll | 6 +++--- llvm/test/Analysis/CostModel/X86/extend.ll | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2ec2946..2092675 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2666,7 +2666,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, static const TypeConversionCostTblEntry AVXConversionTbl[] = { { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, - { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 7 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i1, 4 }, diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index 64ed9bed..d80cb09 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -35,7 +35,7 @@ define i32 @add(i32 %arg) { ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %B = sext <4 x i1> undef to <4 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %C = trunc <4 x i32> undef to <4 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %D = zext <8 x i1> undef to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %E = sext <8 x i1> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %E = sext <8 x i1> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %F = trunc <8 x i32> undef to <8 x i1> ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %G = zext i1 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %H = trunc i32 undef to i1 @@ -143,7 +143,7 @@ define i32 @zext_sext(<8 x i1> %in) { ; ; AVX1-LABEL: 'zext_sext' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <8 x i1> %in to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <8 x i1> %in to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A1 = zext <16 x i8> undef to <16 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A2 = sext <16 x i8> undef to <16 x i16> ; AVX1-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %A = sext <8 x i16> undef to <8 x i32> @@ -343,7 +343,7 @@ define i32 @masks8(<8 x i1> %in) { ; ; AVX1-LABEL: 'masks8' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <8 x i1> %in to <8 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %S = sext <8 x i1> %in to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <8 x i1> %in to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'masks8' diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 01efced..34fa3c4 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -1971,7 +1971,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> -; AVX1-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = sext <32 x i1> undef to <32 x i32> ; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = sext <64 x i1> undef to <64 x i32> @@ -2251,7 +2251,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I32 = sext i1 undef to i32 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i32 = sext <2 x i1> undef to <2 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4i32 = sext <4 x i1> undef to <4 x i32> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8i32 = sext <8 x i1> undef to <8 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V16i32 = sext <16 x i1> undef to <16 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V32i32 = sext <32 x i1> undef to <32 x i32> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V64i32 = sext <64 x i1> undef to <64 x i32> -- cgit v1.1 From 5ad320abe36357e3290007d3ab353e8637f33720 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Thu, 4 Apr 2024 12:44:32 +0100 Subject: [ARM][Thumb2] Mark BTI-clearing instructions as scheduling region boundaries (#79173) Following https://github.com/llvm/llvm-project/pull/68313 this patch extends the idea to M-profile PACBTI. The Machine Scheduler can reorder instructions within a scheduling region depending on the scheduling policy set. If a BTI-clearing instruction happens to partake in one such region, it might be moved around, therefore ending up where it shouldn't. The solution is to mark all BTI-clearing instructions as scheduling region boundaries. This essentially means that they must not be part of any scheduling region, and as consequence never get moved: - PAC - PACBTI - BTI - SG Note that PAC isn't BTI-clearing, but it's replaced by PACBTI late in the compilation pipeline. As far as I know, currently it isn't possible to organically obtain code that's susceptible to the bug: - Instructions that write to SP are region boundaries. PAC seems to always be followed by the pushing of r12 to the stack, so essentially PAC is always by itself in a scheduling region. - CALL_BTI is expanded into a machine instruction bundle. Bundles are unpacked only after the last machine scheduler run. Thus setjmp and BTI can be separated only if someone deliberately run the scheduler once more. - The BTI insertion pass is run late in the pipeline, only after the last machine scheduling has run. So once again it can be reordered only if someone deliberately runs the scheduler again. Nevertheless, one can reasonably argue that we should prevent the bug in spite of the compiler not being able to produce the required conditions for it. If things change, the compiler will be robust against this issue. The tests written for this are contrived: bogus MIR instructions have been added adjacent to the BTI-clearing instructions in order to have them inside non-trivial scheduling regions. --- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 19 +++ llvm/lib/Target/ARM/Thumb2InstrInfo.h | 4 + llvm/test/CodeGen/ARM/misched-branch-targets.mir | 166 +++++++++++++++++++++++ 3 files changed, 189 insertions(+) create mode 100644 llvm/test/CodeGen/ARM/misched-branch-targets.mir diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index 083f25f..fc2834c 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -286,6 +286,25 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } +bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // BTI clearing instructions shall not take part in scheduling regions as + // they must stay in their intended place. Although PAC isn't BTI clearing, + // it can be transformed into PACBTI after the pre-RA Machine Scheduling + // has taken place, so its movement must also be restricted. + switch (MI.getOpcode()) { + case ARM::t2BTI: + case ARM::t2PAC: + case ARM::t2PACBTI: + case ARM::t2SG: + return true; + default: + break; + } + return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); +} + void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 4bb412f..8915da8 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -68,6 +68,10 @@ public: unsigned OpIdx1, unsigned OpIdx2) const override; + bool isSchedulingBoundary(const MachineInstr &MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const override; + private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir new file mode 100644 index 0000000..b071fbd --- /dev/null +++ b/llvm/test/CodeGen/ARM/misched-branch-targets.mir @@ -0,0 +1,166 @@ +# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s +# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s + +--- | + target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" + target triple = "thumbv8.1m.main-arm-none-eabi" + + define i32 @foo_bti() #0 { + entry: + ret i32 0 + } + + define i32 @foo_pac() #0 { + entry: + ret i32 0 + } + + define i32 @foo_pacbti() #0 { + entry: + ret i32 0 + } + + define i32 @foo_setjmp() #0 { + entry: + ret i32 0 + if.then: + ret i32 0 + } + + define i32 @foo_sg() #0 { + entry: + ret i32 0 + } + + declare i32 @setjmp(ptr noundef) #1 + declare void @longjmp(ptr noundef, i32 noundef) #2 + + attributes #0 = { "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + attributes #1 = { nounwind returns_twice "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + attributes #2 = { noreturn nounwind "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } + +... +--- +name: foo_bti +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0 + + t2BTI + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... + +# CHECK-LABEL: name: foo_bti +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: t2BTI + +--- +name: foo_pac +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0, $lr, $r12 + + frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... + +# CHECK-LABEL: name: foo_pac +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0, $lr, $r12 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp + +--- +name: foo_pacbti +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0, $lr, $r12 + + frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr + $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg + early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg + $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg + $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr + t2AUT implicit $r12, implicit $lr, implicit $sp + tBX_RET 14 /* CC::al */, $noreg, implicit $r0 + +... + +# CHECK-LABEL: name: foo_pacbti +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0, $lr, $r12 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp + +--- +name: foo_setjmp +tracksRegLiveness: true +body: | + bb.0.entry: + successors: %bb.1 + liveins: $lr + + frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp + $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg + $sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg + renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 + t2BTI + renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg + tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr + t2IT 0, 2, implicit-def $itstate + renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate + $sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate + frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate + + bb.1.if.then: + renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg + renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg + tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp + +... + +# CHECK-LABEL: name: foo_setjmp +# CHECK: body: +# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 +# CHECK-NEXT: t2BTI + +--- +name: foo_sg +tracksRegLiveness: true +body: | + bb.0.entry: + liveins: $r0 + + t2SG 14 /* CC::al */, $noreg + renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg + tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 + +... + +# CHECK-LABEL: name: foo_sg +# CHECK: body: +# CHECK-NEXT: bb.0.entry: +# CHECK-NEXT: liveins: $r0 +# CHECK-NEXT: {{^ +$}} +# CHECK-NEXT: t2SG -- cgit v1.1 From a8c59750d911eb30d5664696db19af445dd770f8 Mon Sep 17 00:00:00 2001 From: OverMighty Date: Thu, 4 Apr 2024 13:22:45 +0100 Subject: [libc][math][c23] Add exp2m1f C23 math function (#86996) Fixes #86502. cc @lntue --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/docs/math/index.rst | 2 +- libc/spec/stdc.td | 2 + libc/src/math/CMakeLists.txt | 2 + libc/src/math/exp2m1f.h | 18 +++ libc/src/math/generic/CMakeLists.txt | 21 +++ libc/src/math/generic/exp2m1f.cpp | 183 +++++++++++++++++++++++++ libc/test/src/math/CMakeLists.txt | 15 ++ libc/test/src/math/exhaustive/CMakeLists.txt | 15 ++ libc/test/src/math/exhaustive/exp2m1f_test.cpp | 33 +++++ libc/test/src/math/exp2m1f_test.cpp | 66 +++++++++ libc/test/src/math/smoke/CMakeLists.txt | 11 ++ libc/test/src/math/smoke/exp2m1f_test.cpp | 63 +++++++++ libc/utils/MPFRWrapper/MPFRUtils.cpp | 50 ++++++- libc/utils/MPFRWrapper/MPFRUtils.h | 1 + 15 files changed, 478 insertions(+), 5 deletions(-) create mode 100644 libc/src/math/exp2m1f.h create mode 100644 libc/src/math/generic/exp2m1f.cpp create mode 100644 libc/test/src/math/exhaustive/exp2m1f_test.cpp create mode 100644 libc/test/src/math/exp2m1f_test.cpp create mode 100644 libc/test/src/math/smoke/exp2m1f_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index cc7671c..2742c33 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -370,6 +370,7 @@ set(TARGET_LIBM_ENTRYPOINTS libc.src.math.exp10f libc.src.math.exp2 libc.src.math.exp2f + libc.src.math.exp2m1f libc.src.math.expm1 libc.src.math.expm1f libc.src.math.fabs diff --git a/libc/docs/math/index.rst b/libc/docs/math/index.rst index 265261b..970a43c 100644 --- a/libc/docs/math/index.rst +++ b/libc/docs/math/index.rst @@ -270,7 +270,7 @@ Higher Math Functions +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | exp2 | |check| | |check| | | | | 7.12.6.4 | F.10.3.4 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ -| exp2m1 | | | | | | 7.12.6.5 | F.10.3.5 | +| exp2m1 | |check| | | | | | 7.12.6.5 | F.10.3.5 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ | expm1 | |check| | |check| | | | | 7.12.6.6 | F.10.3.6 | +-----------+------------------+-----------------+------------------------+----------------------+------------------------+------------------------+----------------------------+ diff --git a/libc/spec/stdc.td b/libc/spec/stdc.td index 719bb9a..bd62870 100644 --- a/libc/spec/stdc.td +++ b/libc/spec/stdc.td @@ -535,6 +535,8 @@ def StdC : StandardSpec<"stdc"> { FunctionSpec<"exp2", RetValSpec, [ArgSpec]>, FunctionSpec<"exp2f", RetValSpec, [ArgSpec]>, + FunctionSpec<"exp2m1f", RetValSpec, [ArgSpec]>, + FunctionSpec<"expm1", RetValSpec, [ArgSpec]>, FunctionSpec<"expm1f", RetValSpec, [ArgSpec]>, diff --git a/libc/src/math/CMakeLists.txt b/libc/src/math/CMakeLists.txt index b67ee3a..c89792b 100644 --- a/libc/src/math/CMakeLists.txt +++ b/libc/src/math/CMakeLists.txt @@ -88,6 +88,8 @@ add_math_entrypoint_object(expf) add_math_entrypoint_object(exp2) add_math_entrypoint_object(exp2f) +add_math_entrypoint_object(exp2m1f) + add_math_entrypoint_object(exp10) add_math_entrypoint_object(exp10f) diff --git a/libc/src/math/exp2m1f.h b/libc/src/math/exp2m1f.h new file mode 100644 index 0000000..0eaf6b0 --- /dev/null +++ b/libc/src/math/exp2m1f.h @@ -0,0 +1,18 @@ +//===-- Implementation header for exp2m1f -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_MATH_EXP2M1F_H +#define LLVM_LIBC_SRC_MATH_EXP2M1F_H + +namespace LIBC_NAMESPACE { + +float exp2m1f(float x); + +} // namespace LIBC_NAMESPACE + +#endif // LLVM_LIBC_SRC_MATH_EXP2M1F_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index b164d33..dc77f8b 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -838,6 +838,27 @@ add_entrypoint_object( ) add_entrypoint_object( + exp2m1f + SRCS + exp2m1f.cpp + HDRS + ../exp2m1f.h + DEPENDS + .explogxf + libc.src.errno.errno + libc.src.__support.common + libc.src.__support.FPUtil.fenv_impl + libc.src.__support.FPUtil.fp_bits + libc.src.__support.FPUtil.multiply_add + libc.src.__support.FPUtil.polyeval + libc.src.__support.FPUtil.rounding_mode + libc.src.__support.macros.optimization + libc.src.__support.macros.properties.cpu_features + COMPILE_OPTIONS + -O3 +) + +add_entrypoint_object( exp10 SRCS exp10.cpp diff --git a/libc/src/math/generic/exp2m1f.cpp b/libc/src/math/generic/exp2m1f.cpp new file mode 100644 index 0000000..c60930d --- /dev/null +++ b/libc/src/math/generic/exp2m1f.cpp @@ -0,0 +1,183 @@ +//===-- Implementation of exp2m1f function --------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/math/exp2m1f.h" +#include "src/__support/FPUtil/FEnvImpl.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/__support/FPUtil/PolyEval.h" +#include "src/__support/FPUtil/except_value_utils.h" +#include "src/__support/FPUtil/multiply_add.h" +#include "src/__support/FPUtil/rounding_mode.h" +#include "src/__support/common.h" +#include "src/__support/macros/optimization.h" +#include "src/__support/macros/properties/cpu_features.h" +#include "src/errno/libc_errno.h" + +#include "explogxf.h" + +namespace LIBC_NAMESPACE { + +static constexpr size_t N_EXCEPTS_LO = 8; + +static constexpr fputil::ExceptValues EXP2M1F_EXCEPTS_LO = + {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.36dc8ep-36, exp2m1f(x) = 0x1.aef212p-37 (RZ) + {0x2d9b'6e47U, 0x2d57'7909U, 1U, 0U, 0U}, + // x = 0x1.224936p-19, exp2m1f(x) = 0x1.926c0ep-20 (RZ) + {0x3611'249bU, 0x35c9'3607U, 1U, 0U, 1U}, + // x = 0x1.d16d2p-20, exp2m1f(x) = 0x1.429becp-20 (RZ) + {0x35e8'b690U, 0x35a1'4df6U, 1U, 0U, 1U}, + // x = 0x1.17949ep-14, exp2m1f(x) = 0x1.8397p-15 (RZ) + {0x388b'ca4fU, 0x3841'cb80U, 1U, 0U, 1U}, + // x = -0x1.9c3e1ep-38, exp2m1f(x) = -0x1.1dbeacp-38 (RZ) + {0xacce'1f0fU, 0xac8e'df56U, 0U, 1U, 0U}, + // x = -0x1.4d89b4p-32, exp2m1f(x) = -0x1.ce61b6p-33 (RZ) + {0xafa6'c4daU, 0xaf67'30dbU, 0U, 1U, 1U}, + // x = -0x1.a6eac4p-10, exp2m1f(x) = -0x1.24fadap-10 (RZ) + {0xbad3'7562U, 0xba92'7d6dU, 0U, 1U, 1U}, + // x = -0x1.e7526ep-6, exp2m1f(x) = -0x1.4e53dep-6 (RZ) + {0xbcf3'a937U, 0xbca7'29efU, 0U, 1U, 1U}, + }}; + +static constexpr size_t N_EXCEPTS_HI = 3; + +static constexpr fputil::ExceptValues EXP2M1F_EXCEPTS_HI = + {{ + // (input, RZ output, RU offset, RD offset, RN offset) + // x = 0x1.16a972p-1, exp2m1f(x) = 0x1.d545b2p-2 (RZ) + {0x3f0b'54b9U, 0x3eea'a2d9U, 1U, 0U, 0U}, + // x = -0x1.9f12acp-5, exp2m1f(x) = -0x1.1ab68cp-5 (RZ) + {0xbd4f'8956U, 0xbd0d'5b46U, 0U, 1U, 0U}, + // x = -0x1.de7b9cp-5, exp2m1f(x) = -0x1.4508f4p-5 (RZ) + {0xbd6f'3dceU, 0xbd22'847aU, 0U, 1U, 1U}, + }}; + +LLVM_LIBC_FUNCTION(float, exp2m1f, (float x)) { + using FPBits = fputil::FPBits; + FPBits xbits(x); + + uint32_t x_u = xbits.uintval(); + uint32_t x_abs = x_u & 0x7fff'ffffU; + + // When |x| >= 128, or x is nan, or |x| <= 2^-5 + if (LIBC_UNLIKELY(x_abs >= 0x4300'0000U || x_abs <= 0x3d00'0000U)) { + // |x| <= 2^-5 + if (x_abs <= 0x3d00'0000U) { + if (auto r = EXP2M1F_EXCEPTS_LO.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // Minimax polynomial generated by Sollya with: + // > display = hexadecimal; + // > fpminimax((2^x - 1)/x, 5, [|D...|], [-2^-5, 2^-5]); + constexpr double COEFFS[] = { + 0x1.62e42fefa39f3p-1, 0x1.ebfbdff82c57bp-3, 0x1.c6b08d6f2d7aap-5, + 0x1.3b2ab6fc92f5dp-7, 0x1.5d897cfe27125p-10, 0x1.43090e61e6af1p-13}; + double xd = x; + double xsq = xd * xd; + double c0 = fputil::multiply_add(xd, COEFFS[1], COEFFS[0]); + double c1 = fputil::multiply_add(xd, COEFFS[3], COEFFS[2]); + double c2 = fputil::multiply_add(xd, COEFFS[5], COEFFS[4]); + double p = fputil::polyeval(xsq, c0, c1, c2); + return static_cast(p * xd); + } + + // x >= 128, or x is nan + if (xbits.is_pos()) { + if (xbits.is_finite()) { + int rounding = fputil::quick_get_round(); + if (rounding == FE_DOWNWARD || rounding == FE_TOWARDZERO) + return FPBits::max_normal().get_val(); + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_OVERFLOW); + } + + // x >= 128 and 2^x - 1 rounds to +inf, or x is +inf or nan + return x + FPBits::inf().get_val(); + } + } + + if (LIBC_UNLIKELY(x <= -25.0f)) { + // 2^(-inf) - 1 = -1 + if (xbits.is_inf()) + return -1.0f; + // 2^nan - 1 = nan + if (xbits.is_nan()) + return x; + + int rounding = fputil::quick_get_round(); + if (rounding == FE_UPWARD || rounding == FE_TOWARDZERO) + return -0x1.ffff'fep-1f; // -1.0f + 0x1.0p-24f + + fputil::set_errno_if_required(ERANGE); + fputil::raise_except_if_required(FE_UNDERFLOW); + return -1.0f; + } + + if (auto r = EXP2M1F_EXCEPTS_HI.lookup(x_u); LIBC_UNLIKELY(r.has_value())) + return r.value(); + + // For -25 < x < 128, to compute 2^x, we perform the following range + // reduction: find hi, mid, lo such that: + // x = hi + mid + lo, in which: + // hi is an integer, + // 0 <= mid * 2^5 < 32 is an integer, + // -2^(-6) <= lo <= 2^(-6). + // In particular, + // hi + mid = round(x * 2^5) * 2^(-5). + // Then, + // 2^x = 2^(hi + mid + lo) = 2^hi * 2^mid * 2^lo. + // 2^mid is stored in the lookup table of 32 elements. + // 2^lo is computed using a degree-4 minimax polynomial generated by Sollya. + // We perform 2^hi * 2^mid by simply add hi to the exponent field of 2^mid. + + // kf = (hi + mid) * 2^5 = round(x * 2^5) + float kf; + int k; +#ifdef LIBC_TARGET_CPU_HAS_NEAREST_INT + kf = fputil::nearest_integer(x * 32.0f); + k = static_cast(kf); +#else + constexpr float HALF[2] = {0.5f, -0.5f}; + k = static_cast(fputil::multiply_add(x, 32.0f, HALF[x < 0.0f])); + kf = static_cast(k); +#endif // LIBC_TARGET_CPU_HAS_NEAREST_INT + + // lo = x - (hi + mid) = x - kf * 2^(-5) + double lo = fputil::multiply_add(-0x1.0p-5f, kf, x); + + // hi = floor(kf * 2^(-4)) + // exp2_hi = shift hi to the exponent field of double precision. + int64_t exp2_hi = + static_cast(static_cast(k >> ExpBase::MID_BITS) + << fputil::FPBits::FRACTION_LEN); + // mh = 2^hi * 2^mid + // mh_bits = bit field of mh + int64_t mh_bits = ExpBase::EXP_2_MID[k & ExpBase::MID_MASK] + exp2_hi; + double mh = fputil::FPBits(static_cast(mh_bits)).get_val(); + + // Degree-4 polynomial approximating (2^x - 1)/x generated by Sollya with: + // > display = hexadecimal; + // > fpminimax((2^x - 1)/x, 4, [|D...|], [-2^-6, 2^-6]); + constexpr double COEFFS[5] = {0x1.62e42fefa39efp-1, 0x1.ebfbdff8131c4p-3, + 0x1.c6b08d7061695p-5, 0x1.3b2b1bee74b2ap-7, + 0x1.5d88091198529p-10}; + double lo_sq = lo * lo; + double c1 = fputil::multiply_add(lo, COEFFS[0], 1.0); + double c2 = fputil::multiply_add(lo, COEFFS[2], COEFFS[1]); + double c3 = fputil::multiply_add(lo, COEFFS[4], COEFFS[3]); + double exp2_lo = fputil::polyeval(lo_sq, c1, c2, c3); + // 2^x - 1 = 2^(hi + mid + lo) - 1 + // = 2^(hi + mid) * 2^lo - 1 + // ~ mh * (1 + lo * P(lo)) - 1 + // = mh * exp2_lo - 1 + return static_cast(fputil::multiply_add(exp2_lo, mh, -1.0)); +} + +} // namespace LIBC_NAMESPACE diff --git a/libc/test/src/math/CMakeLists.txt b/libc/test/src/math/CMakeLists.txt index f8f0f8b..bbf8f07 100644 --- a/libc/test/src/math/CMakeLists.txt +++ b/libc/test/src/math/CMakeLists.txt @@ -638,6 +638,21 @@ add_fp_unittest( ) add_fp_unittest( + exp2m1f_test + NEED_MPFR + SUITE + libc-math-unittests + SRCS + exp2m1f_test.cpp + DEPENDS + libc.include.llvm-libc-macros.math_macros + libc.src.errno.errno + libc.src.math.exp2m1f + libc.src.__support.CPP.array + libc.src.__support.FPUtil.fp_bits +) + +add_fp_unittest( exp10f_test NEED_MPFR SUITE diff --git a/libc/test/src/math/exhaustive/CMakeLists.txt b/libc/test/src/math/exhaustive/CMakeLists.txt index df32dd4..6b2f3dd 100644 --- a/libc/test/src/math/exhaustive/CMakeLists.txt +++ b/libc/test/src/math/exhaustive/CMakeLists.txt @@ -143,6 +143,21 @@ add_fp_unittest( ) add_fp_unittest( + exp2m1f_test + NO_RUN_POSTBUILD + NEED_MPFR + SUITE + libc_math_exhaustive_tests + SRCS + exp2m1f_test.cpp + DEPENDS + .exhaustive_test + libc.src.math.exp2m1f + LINK_LIBRARIES + -lpthread +) + +add_fp_unittest( exp10f_test NO_RUN_POSTBUILD NEED_MPFR diff --git a/libc/test/src/math/exhaustive/exp2m1f_test.cpp b/libc/test/src/math/exhaustive/exp2m1f_test.cpp new file mode 100644 index 0000000..2111024 --- /dev/null +++ b/libc/test/src/math/exhaustive/exp2m1f_test.cpp @@ -0,0 +1,33 @@ +//===-- Exhaustive test for exp2m1f ---------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "exhaustive_test.h" +#include "src/math/exp2m1f.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +using LlvmLibcExp2m1fExhaustiveTest = + LlvmLibcUnaryOpExhaustiveMathTest; + +// Range: [0, Inf]; +static constexpr uint32_t POS_START = 0x0000'0000U; +static constexpr uint32_t POS_STOP = 0x7f80'0000U; + +TEST_F(LlvmLibcExp2m1fExhaustiveTest, PostiveRange) { + test_full_range_all_roundings(POS_START, POS_STOP); +} + +// Range: [-Inf, 0]; +static constexpr uint32_t NEG_START = 0x8000'0000U; +static constexpr uint32_t NEG_STOP = 0xff80'0000U; + +TEST_F(LlvmLibcExp2m1fExhaustiveTest, NegativeRange) { + test_full_range_all_roundings(NEG_START, NEG_STOP); +} diff --git a/libc/test/src/math/exp2m1f_test.cpp b/libc/test/src/math/exp2m1f_test.cpp new file mode 100644 index 0000000..a0f0da8 --- /dev/null +++ b/libc/test/src/math/exp2m1f_test.cpp @@ -0,0 +1,66 @@ +//===-- Unittests for exp2m1f ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "include/llvm-libc-macros/math-macros.h" +#include "src/__support/CPP/array.h" +#include "src/__support/FPUtil/FPBits.h" +#include "src/errno/libc_errno.h" +#include "src/math/exp2m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" +#include "utils/MPFRWrapper/MPFRUtils.h" + +#include + +using LlvmLibcExp2m1fTest = LIBC_NAMESPACE::testing::FPTest; + +namespace mpfr = LIBC_NAMESPACE::testing::mpfr; + +TEST_F(LlvmLibcExp2m1fTest, TrickyInputs) { + constexpr LIBC_NAMESPACE::cpp::array INPUTS = { + // EXP2M1F_EXCEPTS_LO + 0x1.36dc8ep-36, + 0x1.224936p-19, + 0x1.d16d2p-20, + 0x1.17949ep-14, + -0x1.9c3e1ep-38, + -0x1.4d89b4p-32, + -0x1.a6eac4p-10, + -0x1.e7526ep-6, + // EXP2M1F_EXCEPTS_HI + 0x1.16a972p-1, + -0x1.9f12acp-5, + }; + + for (float x : INPUTS) { + LIBC_NAMESPACE::libc_errno = 0; + EXPECT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp2m1, x, + LIBC_NAMESPACE::exp2m1f(x), 0.5); + } +} + +TEST_F(LlvmLibcExp2m1fTest, InFloatRange) { + constexpr uint32_t COUNT = 100'000; + constexpr uint32_t STEP = UINT32_MAX / COUNT; + for (uint32_t i = 0, v = 0; i <= COUNT; ++i, v += STEP) { + float x = FPBits(v).get_val(); + if (isnan(x) || isinf(x)) + continue; + LIBC_NAMESPACE::libc_errno = 0; + float result = LIBC_NAMESPACE::exp2m1f(x); + + // If the computation resulted in an error or did not produce valid result + // in the single-precision floating point range, then ignore comparing with + // MPFR result as MPFR can still produce valid results because of its + // wider precision. + if (isnan(result) || isinf(result) || LIBC_NAMESPACE::libc_errno != 0) + continue; + ASSERT_MPFR_MATCH_ALL_ROUNDING(mpfr::Operation::Exp2m1, x, + LIBC_NAMESPACE::exp2m1f(x), 0.5); + } +} diff --git a/libc/test/src/math/smoke/CMakeLists.txt b/libc/test/src/math/smoke/CMakeLists.txt index 5d269dd..4ac1842 100644 --- a/libc/test/src/math/smoke/CMakeLists.txt +++ b/libc/test/src/math/smoke/CMakeLists.txt @@ -774,6 +774,17 @@ add_fp_unittest( ) add_fp_unittest( + exp2m1f_test + SUITE + libc-math-smoke-tests + SRCS + exp2m1f_test.cpp + DEPENDS + libc.src.errno.errno + libc.src.math.exp2m1f +) + +add_fp_unittest( exp10f_test SUITE libc-math-smoke-tests diff --git a/libc/test/src/math/smoke/exp2m1f_test.cpp b/libc/test/src/math/smoke/exp2m1f_test.cpp new file mode 100644 index 0000000..2df4353 --- /dev/null +++ b/libc/test/src/math/smoke/exp2m1f_test.cpp @@ -0,0 +1,63 @@ +//===-- Unittests for exp2m1f ---------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/errno/libc_errno.h" +#include "src/math/exp2m1f.h" +#include "test/UnitTest/FPMatcher.h" +#include "test/UnitTest/Test.h" + +using LlvmLibcExp2m1fTest = LIBC_NAMESPACE::testing::FPTest; +using LIBC_NAMESPACE::fputil::testing::ForceRoundingMode; +using LIBC_NAMESPACE::fputil::testing::RoundingMode; + +TEST_F(LlvmLibcExp2m1fTest, SpecialNumbers) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_ALL_ROUNDING(aNaN, LIBC_NAMESPACE::exp2m1f(aNaN)); + EXPECT_FP_EQ_ALL_ROUNDING(inf, LIBC_NAMESPACE::exp2m1f(inf)); + EXPECT_FP_EQ_ALL_ROUNDING(-1.0f, LIBC_NAMESPACE::exp2m1f(neg_inf)); + EXPECT_FP_EQ_ALL_ROUNDING(0.0f, LIBC_NAMESPACE::exp2m1f(0.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-0.0f, LIBC_NAMESPACE::exp2m1f(-0.0f)); + + EXPECT_FP_EQ_ALL_ROUNDING(1.0f, LIBC_NAMESPACE::exp2m1f(1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-0.5f, LIBC_NAMESPACE::exp2m1f(-1.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(3.0f, LIBC_NAMESPACE::exp2m1f(2.0f)); + EXPECT_FP_EQ_ALL_ROUNDING(-0.75f, LIBC_NAMESPACE::exp2m1f(-2.0f)); +} + +TEST_F(LlvmLibcExp2m1fTest, Overflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp2m1f(0x1.fffffep+127), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp2m1f(128.0f), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(inf, LIBC_NAMESPACE::exp2m1f(0x1.000002p+7), + FE_OVERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} + +TEST_F(LlvmLibcExp2m1fTest, Underflow) { + LIBC_NAMESPACE::libc_errno = 0; + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp2m1f(-0x1.fffffep+127), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp2m1f(-25.0f), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); + + EXPECT_FP_EQ_WITH_EXCEPTION(-1.0f, LIBC_NAMESPACE::exp2m1f(-0x1.900002p4), + FE_UNDERFLOW); + EXPECT_MATH_ERRNO(ERANGE); +} diff --git a/libc/utils/MPFRWrapper/MPFRUtils.cpp b/libc/utils/MPFRWrapper/MPFRUtils.cpp index a83f7a7..eaa47da 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.cpp +++ b/libc/utils/MPFRWrapper/MPFRUtils.cpp @@ -89,7 +89,8 @@ public: // precision. template , int> = 0> - explicit MPFRNumber(XType x, int precision = ExtraPrecision::VALUE, + explicit MPFRNumber(XType x, + unsigned int precision = ExtraPrecision::VALUE, RoundingMode rounding = RoundingMode::Nearest) : mpfr_precision(precision), mpfr_rounding(get_mpfr_rounding_mode(rounding)) { @@ -99,7 +100,8 @@ public: template , int> = 0> - explicit MPFRNumber(XType x, int precision = ExtraPrecision::VALUE, + explicit MPFRNumber(XType x, + unsigned int precision = ExtraPrecision::VALUE, RoundingMode rounding = RoundingMode::Nearest) : mpfr_precision(precision), mpfr_rounding(get_mpfr_rounding_mode(rounding)) { @@ -109,7 +111,8 @@ public: template , int> = 0> - explicit MPFRNumber(XType x, int precision = ExtraPrecision::VALUE, + explicit MPFRNumber(XType x, + unsigned int precision = ExtraPrecision::VALUE, RoundingMode rounding = RoundingMode::Nearest) : mpfr_precision(precision), mpfr_rounding(get_mpfr_rounding_mode(rounding)) { @@ -119,7 +122,8 @@ public: template , int> = 0> - explicit MPFRNumber(XType x, int precision = ExtraPrecision::VALUE, + explicit MPFRNumber(XType x, + unsigned int precision = ExtraPrecision::VALUE, RoundingMode rounding = RoundingMode::Nearest) : mpfr_precision(precision), mpfr_rounding(get_mpfr_rounding_mode(rounding)) { @@ -134,6 +138,12 @@ public: mpfr_set(value, other.value, mpfr_rounding); } + MPFRNumber(const MPFRNumber &other, unsigned int precision) + : mpfr_precision(precision), mpfr_rounding(other.mpfr_rounding) { + mpfr_init2(value, mpfr_precision); + mpfr_set(value, other.value, mpfr_rounding); + } + ~MPFRNumber() { mpfr_clear(value); } MPFRNumber &operator=(const MPFRNumber &rhs) { @@ -229,6 +239,36 @@ public: return result; } + MPFRNumber exp2m1() const { + // TODO: Only use mpfr_exp2m1 once CI and buildbots get MPFR >= 4.2.0. +#if MPFR_VERSION_MAJOR > 4 || \ + (MPFR_VERSION_MAJOR == 4 && MPFR_VERSION_MINOR >= 2) + MPFRNumber result(*this); + mpfr_exp2m1(result.value, value, mpfr_rounding); + return result; +#else + unsigned int prec = mpfr_precision * 3; + MPFRNumber result(*this, prec); + + float f = mpfr_get_flt(abs().value, mpfr_rounding); + if (f > 0.5f && f < 0x1.0p30f) { + mpfr_exp2(result.value, value, mpfr_rounding); + mpfr_sub_ui(result.value, result.value, 1, mpfr_rounding); + return result; + } + + MPFRNumber ln2(2.0f, prec); + // log(2) + mpfr_log(ln2.value, ln2.value, mpfr_rounding); + // x * log(2) + mpfr_mul(result.value, value, ln2.value, mpfr_rounding); + // e^(x * log(2)) - 1 + int ex = mpfr_expm1(result.value, result.value, mpfr_rounding); + mpfr_subnormalize(result.value, ex, mpfr_rounding); + return result; +#endif + } + MPFRNumber exp10() const { MPFRNumber result(*this); mpfr_exp10(result.value, value, mpfr_rounding); @@ -570,6 +610,8 @@ unary_operation(Operation op, InputType input, unsigned int precision, return mpfrInput.exp(); case Operation::Exp2: return mpfrInput.exp2(); + case Operation::Exp2m1: + return mpfrInput.exp2m1(); case Operation::Exp10: return mpfrInput.exp10(); case Operation::Expm1: diff --git a/libc/utils/MPFRWrapper/MPFRUtils.h b/libc/utils/MPFRWrapper/MPFRUtils.h index d5ff590..0a41ac6 100644 --- a/libc/utils/MPFRWrapper/MPFRUtils.h +++ b/libc/utils/MPFRWrapper/MPFRUtils.h @@ -37,6 +37,7 @@ enum class Operation : int { Erf, Exp, Exp2, + Exp2m1, Exp10, Expm1, Floor, -- cgit v1.1 From ff56584ee9c8a6d5430c0ba461540ccb6696ebc6 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 4 Apr 2024 14:25:54 +0200 Subject: [LLD][COFF] Use getMachineArchType in LinkerDriver::getArch. (#87499) Adds support for ARM64EC, which should use the same search paths as ARM64. It's similar to #87370 and #87495. The test is based on the existing x86 test. Generally ARM64EC libraries are shipped together with native ARM64 libraries (using ECSYMBOLS section mechanism). getMachineArchType uses Triple::thumb, while the existing implementation uses Triple::arm. It's ultimately passed to MSVCPaths.cpp functions, so modify them to accept both forms. --- lld/COFF/Driver.cpp | 13 +------------ lld/test/COFF/print-search-paths-arm64.s | 24 ++++++++++++++++++++++++ llvm/lib/WindowsDriver/MSVCPaths.cpp | 4 ++++ 3 files changed, 29 insertions(+), 12 deletions(-) create mode 100644 lld/test/COFF/print-search-paths-arm64.s diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 2b1d4ab..ea37f8d 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -157,18 +157,7 @@ StringRef LinkerDriver::mangle(StringRef sym) { } llvm::Triple::ArchType LinkerDriver::getArch() { - switch (ctx.config.machine) { - case I386: - return llvm::Triple::ArchType::x86; - case AMD64: - return llvm::Triple::ArchType::x86_64; - case ARMNT: - return llvm::Triple::ArchType::arm; - case ARM64: - return llvm::Triple::ArchType::aarch64; - default: - return llvm::Triple::ArchType::UnknownArch; - } + return getMachineArchType(ctx.config.machine); } bool LinkerDriver::findUnderscoreMangle(StringRef sym) { diff --git a/lld/test/COFF/print-search-paths-arm64.s b/lld/test/COFF/print-search-paths-arm64.s new file mode 100644 index 0000000..fb5c889 --- /dev/null +++ b/lld/test/COFF/print-search-paths-arm64.s @@ -0,0 +1,24 @@ +# REQUIRES: aarch64 + +# RUN: llvm-mc -triple aarch64-windows-msvc %s -filetype=obj -o %t.aarch64.obj +# RUN: lld-link -dll -noentry -winsysroot:%t.dir/sysroot -vctoolsversion:1.1.1.1 -winsdkversion:10.0.1 -libpath:custom-dir \ +# RUN: %t.aarch64.obj -print-search-paths | FileCheck -DSYSROOT=%t.dir %s + +# RUN: llvm-mc -triple arm64ec-windows-msvc %s -filetype=obj -o %t.arm64ec.obj +# RUN: lld-link -dll -noentry -winsysroot:%t.dir/sysroot -vctoolsversion:1.1.1.1 -winsdkversion:10.0.1 -libpath:custom-dir \ +# RUN: %t.arm64ec.obj -print-search-paths -machine:arm64ec | FileCheck -DSYSROOT=%t.dir %s + +# CHECK: Library search paths: +# CHECK-NEXT: (cwd) +# CHECK-NEXT: custom-dir +# CHECK-NEXT: [[CPATH:.*]]lib{{[/\\]}}clang{{[/\\]}}{{[0-9]+}}{{[/\\]}}lib{{[/\\]}}windows +# CHECK-NEXT: [[CPATH]]lib{{[/\\]}}clang{{[/\\]}}{{[0-9]+}}{{[/\\]}}lib +# CHECK-NEXT: [[CPATH]]lib +# CHECK-NEXT: [[SYSROOT]]{{[/\\]}}sysroot{{[/\\]}}DIA SDK{{[/\\]}}lib{{[/\\]}}arm64 +# CHECK-NEXT: [[SYSROOT]]{{[/\\]}}sysroot{{[/\\]}}VC{{[/\\]}}Tools{{[/\\]}}MSVC{{[/\\]}}1.1.1.1{{[/\\]}}lib{{[/\\]}}arm64 +# CHECK-NEXT: [[SYSROOT]]{{[/\\]}}sysroot{{[/\\]}}VC{{[/\\]}}Tools{{[/\\]}}MSVC{{[/\\]}}1.1.1.1{{[/\\]}}atlmfc{{[/\\]}}lib{{[/\\]}}arm64 +# CHECK-NEXT: [[SYSROOT]]{{[/\\]}}sysroot{{[/\\]}}Windows Kits{{[/\\]}}10{{[/\\]}}Lib{{[/\\]}}10.0.1{{[/\\]}}ucrt{{[/\\]}}arm64 +# CHECK-NEXT: [[SYSROOT]]{{[/\\]}}sysroot{{[/\\]}}Windows Kits{{[/\\]}}10{{[/\\]}}Lib{{[/\\]}}10.0.1{{[/\\]}}um{{[/\\]}}arm64 + + .data + .word 1 diff --git a/llvm/lib/WindowsDriver/MSVCPaths.cpp b/llvm/lib/WindowsDriver/MSVCPaths.cpp index 634cfcb..a7bffbb 100644 --- a/llvm/lib/WindowsDriver/MSVCPaths.cpp +++ b/llvm/lib/WindowsDriver/MSVCPaths.cpp @@ -268,6 +268,7 @@ const char *archToWindowsSDKArch(Triple::ArchType Arch) { case Triple::ArchType::x86_64: return "x64"; case Triple::ArchType::arm: + case Triple::ArchType::thumb: return "arm"; case Triple::ArchType::aarch64: return "arm64"; @@ -285,6 +286,7 @@ const char *archToLegacyVCArch(Triple::ArchType Arch) { case Triple::ArchType::x86_64: return "amd64"; case Triple::ArchType::arm: + case Triple::ArchType::thumb: return "arm"; case Triple::ArchType::aarch64: return "arm64"; @@ -300,6 +302,7 @@ const char *archToDevDivInternalArch(Triple::ArchType Arch) { case Triple::ArchType::x86_64: return "amd64"; case Triple::ArchType::arm: + case Triple::ArchType::thumb: return "arm"; case Triple::ArchType::aarch64: return "arm64"; @@ -321,6 +324,7 @@ bool appendArchToWindowsSDKLibPath(int SDKMajor, SmallString<128> LibPath, sys::path::append(LibPath, "x64"); break; case Triple::arm: + case Triple::thumb: // It is not necessary to link against Windows SDK 7.x when targeting ARM. return false; default: -- cgit v1.1 From 22089ae6c591d11143724b4bde418aa067958a8f Mon Sep 17 00:00:00 2001 From: Mehdi Amini Date: Thu, 4 Apr 2024 14:39:02 +0200 Subject: Revert "[flang][runtime] Enable I/O APIs in F18 runtime offload builds." (#87629) Reverts llvm/llvm-project#87543 The pre-merge Windows build is broken. --- flang/include/flang/Runtime/io-api.h | 164 ++++++++++++++-------------- flang/runtime/environment.cpp | 2 - flang/runtime/environment.h | 2 +- flang/runtime/freestanding-tools.h | 19 ---- flang/runtime/io-api.cpp | 204 ++++++++++++++++++----------------- flang/runtime/io-error.cpp | 9 +- flang/runtime/io-error.h | 2 +- flang/runtime/namelist.cpp | 46 ++++---- 8 files changed, 213 insertions(+), 235 deletions(-) diff --git a/flang/include/flang/Runtime/io-api.h b/flang/include/flang/Runtime/io-api.h index 328afc7..1b6c4f5 100644 --- a/flang/include/flang/Runtime/io-api.h +++ b/flang/include/flang/Runtime/io-api.h @@ -92,18 +92,18 @@ constexpr std::size_t RecommendedInternalIoScratchAreaBytes( // Internal I/O to/from character arrays &/or non-default-kind character // requires a descriptor, which is copied. -Cookie IODECL(BeginInternalArrayListOutput)(const Descriptor &, +Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalArrayListInput)(const Descriptor &, +Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalArrayFormattedOutput)(const Descriptor &, +Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalArrayFormattedInput)(const Descriptor &, +Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -111,20 +111,20 @@ Cookie IODECL(BeginInternalArrayFormattedInput)(const Descriptor &, // Internal I/O to/from a default-kind character scalar can avoid a // descriptor. -Cookie IODECL(BeginInternalListOutput)(char *internal, +Cookie IONAME(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalListInput)(const char *internal, +Cookie IONAME(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalFormattedOutput)(char *internal, +Cookie IONAME(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInternalFormattedInput)(const char *internal, +Cookie IONAME(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor = nullptr, void **scratchArea = nullptr, std::size_t scratchBytes = 0, const char *sourceFile = nullptr, @@ -139,63 +139,63 @@ Cookie IODECL(BeginInternalFormattedInput)(const char *internal, // If handleError is false, and the unit number is out of range, the program // will be terminated. Otherwise, if unit is out of range, a nonzero Iostat // code is returned and ioMsg is set if it is not a nullptr. -enum Iostat IODECL(CheckUnitNumberInRange64)(std::int64_t unit, +enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); -enum Iostat IODECL(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg = nullptr, std::size_t ioMsgLength = 0, const char *sourceFile = nullptr, int sourceLine = 0); // External synchronous I/O initiation Cookie IODECL(BeginExternalListOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, +Cookie IONAME(BeginExternalListInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginExternalFormattedOutput)(const char *format, std::size_t, +Cookie IONAME(BeginExternalFormattedOutput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginExternalFormattedInput)(const char *format, std::size_t, +Cookie IONAME(BeginExternalFormattedInput)(const char *format, std::size_t, const Descriptor *formatDescriptor = nullptr, ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, +Cookie IONAME(BeginUnformattedOutput)(ExternalUnit = DefaultOutputUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, +Cookie IONAME(BeginUnformattedInput)(ExternalUnit = DefaultInputUnit, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(ID=) -Cookie IODECL(BeginWait)(ExternalUnit, AsynchronousId, +Cookie IONAME(BeginWait)(ExternalUnit, AsynchronousId, const char *sourceFile = nullptr, int sourceLine = 0); // WAIT(no ID=) -Cookie IODECL(BeginWaitAll)( +Cookie IONAME(BeginWaitAll)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // Other I/O statements -Cookie IODECL(BeginClose)( +Cookie IONAME(BeginClose)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginFlush)( +Cookie IONAME(BeginFlush)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginBackspace)( +Cookie IONAME(BeginBackspace)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginEndfile)( +Cookie IONAME(BeginEndfile)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginRewind)( +Cookie IONAME(BeginRewind)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); // OPEN(UNIT=) and OPEN(NEWUNIT=) have distinct interfaces. -Cookie IODECL(BeginOpenUnit)( +Cookie IONAME(BeginOpenUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginOpenNewUnit)( +Cookie IONAME(BeginOpenNewUnit)( const char *sourceFile = nullptr, int sourceLine = 0); // The variant forms of INQUIRE() statements have distinct interfaces. // BeginInquireIoLength() is basically a no-op output statement. -Cookie IODECL(BeginInquireUnit)( +Cookie IONAME(BeginInquireUnit)( ExternalUnit, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInquireFile)(const char *, std::size_t, +Cookie IONAME(BeginInquireFile)(const char *, std::size_t, const char *sourceFile = nullptr, int sourceLine = 0); -Cookie IODECL(BeginInquireIoLength)( +Cookie IONAME(BeginInquireIoLength)( const char *sourceFile = nullptr, int sourceLine = 0); // If an I/O statement has any IOSTAT=, ERR=, END=, or EOR= specifiers, @@ -214,33 +214,33 @@ Cookie IODECL(BeginInquireIoLength)( // } // } // if (EndIoStatement(cookie) == FORTRAN_RUTIME_IOSTAT_END) goto label666; -void IODECL(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, +void IONAME(EnableHandlers)(Cookie, bool hasIoStat = false, bool hasErr = false, bool hasEnd = false, bool hasEor = false, bool hasIoMsg = false); // ASYNCHRONOUS='YES' or 'NO' on READ/WRITE/OPEN // Use GetAsynchronousId() to handle ID=. -bool IODECL(SetAsynchronous)(Cookie, const char *, std::size_t); +bool IONAME(SetAsynchronous)(Cookie, const char *, std::size_t); // Control list options. These return false on a error that the // Begin...() call has specified will be handled by the caller. // The interfaces that pass a default-kind CHARACTER argument // are limited to passing specific case-insensitive keyword values. // ADVANCE=YES, NO -bool IODECL(SetAdvance)(Cookie, const char *, std::size_t); +bool IONAME(SetAdvance)(Cookie, const char *, std::size_t); // BLANK=NULL, ZERO -bool IODECL(SetBlank)(Cookie, const char *, std::size_t); +bool IONAME(SetBlank)(Cookie, const char *, std::size_t); // DECIMAL=COMMA, POINT -bool IODECL(SetDecimal)(Cookie, const char *, std::size_t); +bool IONAME(SetDecimal)(Cookie, const char *, std::size_t); // DELIM=APOSTROPHE, QUOTE, NONE -bool IODECL(SetDelim)(Cookie, const char *, std::size_t); +bool IONAME(SetDelim)(Cookie, const char *, std::size_t); // PAD=YES, NO -bool IODECL(SetPad)(Cookie, const char *, std::size_t); -bool IODECL(SetPos)(Cookie, std::int64_t); -bool IODECL(SetRec)(Cookie, std::int64_t); +bool IONAME(SetPad)(Cookie, const char *, std::size_t); +bool IONAME(SetPos)(Cookie, std::int64_t); +bool IONAME(SetRec)(Cookie, std::int64_t); // ROUND=UP, DOWN, ZERO, NEAREST, COMPATIBLE, PROCESSOR_DEFINED -bool IODECL(SetRound)(Cookie, const char *, std::size_t); +bool IONAME(SetRound)(Cookie, const char *, std::size_t); // SIGN=PLUS, SUPPRESS, PROCESSOR_DEFINED -bool IODECL(SetSign)(Cookie, const char *, std::size_t); +bool IONAME(SetSign)(Cookie, const char *, std::size_t); // Data item transfer for modes other than NAMELIST: // Any data object that can be passed as an actual argument without the @@ -256,34 +256,34 @@ bool IODECL(SetSign)(Cookie, const char *, std::size_t); // Once the statement has encountered an error, all following items will be // ignored and also return false; but compiled code should check for errors // and avoid the following items when they might crash. -bool IODECL(OutputDescriptor)(Cookie, const Descriptor &); -bool IODECL(InputDescriptor)(Cookie, const Descriptor &); +bool IONAME(OutputDescriptor)(Cookie, const Descriptor &); +bool IONAME(InputDescriptor)(Cookie, const Descriptor &); // Formatted (including list directed) I/O data items -bool IODECL(OutputInteger8)(Cookie, std::int8_t); -bool IODECL(OutputInteger16)(Cookie, std::int16_t); +bool IONAME(OutputInteger8)(Cookie, std::int8_t); +bool IONAME(OutputInteger16)(Cookie, std::int16_t); bool IODECL(OutputInteger32)(Cookie, std::int32_t); -bool IODECL(OutputInteger64)(Cookie, std::int64_t); -bool IODECL(OutputInteger128)(Cookie, common::int128_t); -bool IODECL(InputInteger)(Cookie, std::int64_t &, int kind = 8); -bool IODECL(OutputReal32)(Cookie, float); -bool IODECL(InputReal32)(Cookie, float &); -bool IODECL(OutputReal64)(Cookie, double); -bool IODECL(InputReal64)(Cookie, double &); -bool IODECL(OutputComplex32)(Cookie, float, float); -bool IODECL(InputComplex32)(Cookie, float[2]); -bool IODECL(OutputComplex64)(Cookie, double, double); -bool IODECL(InputComplex64)(Cookie, double[2]); -bool IODECL(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); -bool IODECL(OutputAscii)(Cookie, const char *, std::size_t); -bool IODECL(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); -bool IODECL(InputAscii)(Cookie, char *, std::size_t); -bool IODECL(OutputLogical)(Cookie, bool); -bool IODECL(InputLogical)(Cookie, bool &); +bool IONAME(OutputInteger64)(Cookie, std::int64_t); +bool IONAME(OutputInteger128)(Cookie, common::int128_t); +bool IONAME(InputInteger)(Cookie, std::int64_t &, int kind = 8); +bool IONAME(OutputReal32)(Cookie, float); +bool IONAME(InputReal32)(Cookie, float &); +bool IONAME(OutputReal64)(Cookie, double); +bool IONAME(InputReal64)(Cookie, double &); +bool IONAME(OutputComplex32)(Cookie, float, float); +bool IONAME(InputComplex32)(Cookie, float[2]); +bool IONAME(OutputComplex64)(Cookie, double, double); +bool IONAME(InputComplex64)(Cookie, double[2]); +bool IONAME(OutputCharacter)(Cookie, const char *, std::size_t, int kind = 1); +bool IONAME(OutputAscii)(Cookie, const char *, std::size_t); +bool IONAME(InputCharacter)(Cookie, char *, std::size_t, int kind = 1); +bool IONAME(InputAscii)(Cookie, char *, std::size_t); +bool IONAME(OutputLogical)(Cookie, bool); +bool IONAME(InputLogical)(Cookie, bool &); // NAMELIST I/O must be the only data item in an (otherwise) // list-directed I/O statement. -bool IODECL(OutputNamelist)(Cookie, const NamelistGroup &); -bool IODECL(InputNamelist)(Cookie, const NamelistGroup &); +bool IONAME(OutputNamelist)(Cookie, const NamelistGroup &); +bool IONAME(InputNamelist)(Cookie, const NamelistGroup &); // When an I/O list item has a derived type with a specific defined // I/O subroutine of the appropriate generic kind for the active @@ -294,9 +294,9 @@ bool IODECL(InputNamelist)(Cookie, const NamelistGroup &); // made such a generic interface inaccessible), these data item transfer // APIs enable the I/O runtime to make the right calls to defined I/O // subroutines. -bool IODECL(OutputDerivedType)( +bool IONAME(OutputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); -bool IODECL(InputDerivedType)( +bool IONAME(InputDerivedType)( Cookie, const Descriptor &, const NonTbpDefinedIoTable *); // Additional specifier interfaces for the connection-list of @@ -304,56 +304,56 @@ bool IODECL(InputDerivedType)( // SetDelim(), GetIoMsg(), SetPad(), SetRound(), SetSign(), // & SetAsynchronous() are also acceptable for OPEN. // ACCESS=SEQUENTIAL, DIRECT, STREAM -bool IODECL(SetAccess)(Cookie, const char *, std::size_t); +bool IONAME(SetAccess)(Cookie, const char *, std::size_t); // ACTION=READ, WRITE, or READWRITE -bool IODECL(SetAction)(Cookie, const char *, std::size_t); +bool IONAME(SetAction)(Cookie, const char *, std::size_t); // CARRIAGECONTROL=LIST, FORTRAN, NONE -bool IODECL(SetCarriagecontrol)(Cookie, const char *, std::size_t); +bool IONAME(SetCarriagecontrol)(Cookie, const char *, std::size_t); // CONVERT=NATIVE, LITTLE_ENDIAN, BIG_ENDIAN, or SWAP -bool IODECL(SetConvert)(Cookie, const char *, std::size_t); +bool IONAME(SetConvert)(Cookie, const char *, std::size_t); // ENCODING=UTF-8, DEFAULT -bool IODECL(SetEncoding)(Cookie, const char *, std::size_t); +bool IONAME(SetEncoding)(Cookie, const char *, std::size_t); // FORM=FORMATTED, UNFORMATTED -bool IODECL(SetForm)(Cookie, const char *, std::size_t); +bool IONAME(SetForm)(Cookie, const char *, std::size_t); // POSITION=ASIS, REWIND, APPEND -bool IODECL(SetPosition)(Cookie, const char *, std::size_t); -bool IODECL(SetRecl)(Cookie, std::size_t); // RECL= +bool IONAME(SetPosition)(Cookie, const char *, std::size_t); +bool IONAME(SetRecl)(Cookie, std::size_t); // RECL= // STATUS can be set during an OPEN or CLOSE statement. // For OPEN: STATUS=OLD, NEW, SCRATCH, REPLACE, UNKNOWN // For CLOSE: STATUS=KEEP, DELETE -bool IODECL(SetStatus)(Cookie, const char *, std::size_t); +bool IONAME(SetStatus)(Cookie, const char *, std::size_t); -bool IODECL(SetFile)(Cookie, const char *, std::size_t chars); +bool IONAME(SetFile)(Cookie, const char *, std::size_t chars); // Acquires the runtime-created unit number for OPEN(NEWUNIT=) -bool IODECL(GetNewUnit)(Cookie, int &, int kind = 4); +bool IONAME(GetNewUnit)(Cookie, int &, int kind = 4); // READ(SIZE=), after all input items -std::size_t IODECL(GetSize)(Cookie); +std::size_t IONAME(GetSize)(Cookie); // INQUIRE(IOLENGTH=), after all output items -std::size_t IODECL(GetIoLength)(Cookie); +std::size_t IONAME(GetIoLength)(Cookie); // GetIoMsg() does not modify its argument unless an error or // end-of-record/file condition is present. -void IODECL(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= +void IONAME(GetIoMsg)(Cookie, char *, std::size_t); // IOMSG= // Defines ID= on READ/WRITE(ASYNCHRONOUS='YES') -AsynchronousId IODECL(GetAsynchronousId)(Cookie); +AsynchronousId IONAME(GetAsynchronousId)(Cookie); // INQUIRE() specifiers are mostly identified by their NUL-terminated // case-insensitive names. // ACCESS, ACTION, ASYNCHRONOUS, BLANK, CONVERT, DECIMAL, DELIM, DIRECT, // ENCODING, FORM, FORMATTED, NAME, PAD, POSITION, READ, READWRITE, ROUND, // SEQUENTIAL, SIGN, STREAM, UNFORMATTED, WRITE: -bool IODECL(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); +bool IONAME(InquireCharacter)(Cookie, InquiryKeywordHash, char *, std::size_t); // EXIST, NAMED, OPENED, and PENDING (without ID): -bool IODECL(InquireLogical)(Cookie, InquiryKeywordHash, bool &); +bool IONAME(InquireLogical)(Cookie, InquiryKeywordHash, bool &); // PENDING with ID -bool IODECL(InquirePendingId)(Cookie, AsynchronousId, bool &); +bool IONAME(InquirePendingId)(Cookie, AsynchronousId, bool &); // NEXTREC, NUMBER, POS, RECL, SIZE -bool IODECL(InquireInteger64)( +bool IONAME(InquireInteger64)( Cookie, InquiryKeywordHash, std::int64_t &, int kind = 8); // This function must be called to end an I/O statement, and its diff --git a/flang/runtime/environment.cpp b/flang/runtime/environment.cpp index b2c9665..b74067a 100644 --- a/flang/runtime/environment.cpp +++ b/flang/runtime/environment.cpp @@ -49,7 +49,6 @@ static void SetEnvironmentDefaults(const EnvironmentDefaultList *envDefaults) { } } -RT_OFFLOAD_API_GROUP_BEGIN Fortran::common::optional GetConvertFromString( const char *x, std::size_t n) { static const char *keywords[]{ @@ -69,7 +68,6 @@ Fortran::common::optional GetConvertFromString( return Fortran::common::nullopt; } } -RT_OFFLOAD_API_GROUP_END void ExecutionEnvironment::Configure(int ac, const char *av[], const char *env[], const EnvironmentDefaultList *envDefaults) { diff --git a/flang/runtime/environment.h b/flang/runtime/environment.h index b8b9f10..6c56993 100644 --- a/flang/runtime/environment.h +++ b/flang/runtime/environment.h @@ -31,7 +31,7 @@ RT_OFFLOAD_VAR_GROUP_END // External unformatted I/O data conversions enum class Convert { Unknown, Native, LittleEndian, BigEndian, Swap }; -RT_API_ATTRS Fortran::common::optional GetConvertFromString( +Fortran::common::optional GetConvertFromString( const char *, std::size_t); struct ExecutionEnvironment { diff --git a/flang/runtime/freestanding-tools.h b/flang/runtime/freestanding-tools.h index 9089dc6..451bf13 100644 --- a/flang/runtime/freestanding-tools.h +++ b/flang/runtime/freestanding-tools.h @@ -52,11 +52,6 @@ #define STD_STRCPY_UNSUPPORTED 1 #endif -#if !defined(STD_STRCMP_UNSUPPORTED) && \ - (defined(__CUDACC__) || defined(__CUDA__)) && defined(__CUDA_ARCH__) -#define STD_STRCMP_UNSUPPORTED 1 -#endif - namespace Fortran::runtime { #if STD_FILL_N_UNSUPPORTED @@ -181,19 +176,5 @@ static inline RT_API_ATTRS char *strcpy(char *dest, const char *src) { using std::strcpy; #endif // !STD_STRCPY_UNSUPPORTED -#if STD_STRCMP_UNSUPPORTED -// Provides alternative implementation for std::strcmp(), if -// it is not supported. -static inline RT_API_ATTRS int strcmp(const char *lhs, const char *rhs) { - while (*lhs != '\0' && *lhs == *rhs) { - ++lhs; - ++rhs; - } - return static_cast(*lhs) - static_cast(*rhs); -} -#else // !STD_STRCMP_UNSUPPORTED -using std::strcmp; -#endif // !STD_STRCMP_UNSUPPORTED - } // namespace Fortran::runtime #endif // FORTRAN_RUNTIME_FREESTANDING_TOOLS_H_ diff --git a/flang/runtime/io-api.cpp b/flang/runtime/io-api.cpp index ccb5b57..3a86c9f 100644 --- a/flang/runtime/io-api.cpp +++ b/flang/runtime/io-api.cpp @@ -25,9 +25,8 @@ #include namespace Fortran::runtime::io { -RT_EXT_API_GROUP_BEGIN -RT_API_ATTRS const char *InquiryKeywordHashDecode( +const char *InquiryKeywordHashDecode( char *buffer, std::size_t n, InquiryKeywordHash hash) { if (n < 1) { return nullptr; @@ -45,7 +44,7 @@ RT_API_ATTRS const char *InquiryKeywordHashDecode( } template -RT_API_ATTRS Cookie BeginInternalArrayListIO(const Descriptor &descriptor, +Cookie BeginInternalArrayListIO(const Descriptor &descriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; @@ -55,14 +54,14 @@ RT_API_ATTRS Cookie BeginInternalArrayListIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IODEF(BeginInternalArrayListOutput)(const Descriptor &descriptor, +Cookie IONAME(BeginInternalArrayListOutput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( descriptor, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IODEF(BeginInternalArrayListInput)(const Descriptor &descriptor, +Cookie IONAME(BeginInternalArrayListInput)(const Descriptor &descriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalArrayListIO( @@ -70,7 +69,7 @@ Cookie IODEF(BeginInternalArrayListInput)(const Descriptor &descriptor, } template -RT_API_ATTRS Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, +Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, std::size_t /*scratchBytes*/, const char *sourceFile, int sourceLine) { @@ -81,7 +80,7 @@ RT_API_ATTRS Cookie BeginInternalArrayFormattedIO(const Descriptor &descriptor, ->ioStatementState(); } -Cookie IODEF(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, +Cookie IONAME(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -90,7 +89,7 @@ Cookie IODEF(BeginInternalArrayFormattedOutput)(const Descriptor &descriptor, sourceLine); } -Cookie IODEF(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, +Cookie IONAME(BeginInternalArrayFormattedInput)(const Descriptor &descriptor, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -111,14 +110,14 @@ RT_API_ATTRS Cookie BeginInternalListIO( ->ioStatementState(); } -Cookie IODEF(BeginInternalListOutput)(char *internal, +Cookie IONAME(BeginInternalListOutput)(char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, scratchArea, scratchBytes, sourceFile, sourceLine); } -Cookie IODEF(BeginInternalListInput)(const char *internal, +Cookie IONAME(BeginInternalListInput)(const char *internal, std::size_t internalLength, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { return BeginInternalListIO(internal, internalLength, @@ -126,7 +125,7 @@ Cookie IODEF(BeginInternalListInput)(const char *internal, } template -RT_API_ATTRS Cookie BeginInternalFormattedIO( +Cookie BeginInternalFormattedIO( std::conditional_t *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void ** /*scratchArea*/, @@ -139,7 +138,7 @@ RT_API_ATTRS Cookie BeginInternalFormattedIO( ->ioStatementState(); } -Cookie IODEF(BeginInternalFormattedOutput)(char *internal, +Cookie IONAME(BeginInternalFormattedOutput)(char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -148,7 +147,7 @@ Cookie IODEF(BeginInternalFormattedOutput)(char *internal, sourceFile, sourceLine); } -Cookie IODEF(BeginInternalFormattedInput)(const char *internal, +Cookie IONAME(BeginInternalFormattedInput)(const char *internal, std::size_t internalLength, const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, void **scratchArea, std::size_t scratchBytes, const char *sourceFile, int sourceLine) { @@ -228,22 +227,24 @@ RT_API_ATTRS Cookie BeginExternalListIO( } } +RT_EXT_API_GROUP_BEGIN Cookie IODEF(BeginExternalListOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } +RT_EXT_API_GROUP_END -Cookie IODEF(BeginExternalListInput)( +Cookie IONAME(BeginExternalListInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalListIO( unitNumber, sourceFile, sourceLine); } template -RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, - std::size_t formatLength, const Descriptor *formatDescriptor, - ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { +Cookie BeginExternalFormattedIO(const char *format, std::size_t formatLength, + const Descriptor *formatDescriptor, ExternalUnit unitNumber, + const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; ExternalFileUnit *unit{GetOrCreateUnit( @@ -285,14 +286,14 @@ RT_API_ATTRS Cookie BeginExternalFormattedIO(const char *format, } } -Cookie IODEF(BeginExternalFormattedOutput)(const char *format, +Cookie IONAME(BeginExternalFormattedOutput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, formatDescriptor, unitNumber, sourceFile, sourceLine); } -Cookie IODEF(BeginExternalFormattedInput)(const char *format, +Cookie IONAME(BeginExternalFormattedInput)(const char *format, std::size_t formatLength, const Descriptor *formatDescriptor, ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginExternalFormattedIO(format, formatLength, @@ -300,7 +301,7 @@ Cookie IODEF(BeginExternalFormattedInput)(const char *format, } template -RT_API_ATTRS Cookie BeginUnformattedIO( +Cookie BeginUnformattedIO( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -351,19 +352,19 @@ RT_API_ATTRS Cookie BeginUnformattedIO( } } -Cookie IODEF(BeginUnformattedOutput)( +Cookie IONAME(BeginUnformattedOutput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IODEF(BeginUnformattedInput)( +Cookie IONAME(BeginUnformattedInput)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return BeginUnformattedIO( unitNumber, sourceFile, sourceLine); } -Cookie IODEF(BeginOpenUnit)( // OPEN(without NEWUNIT=) +Cookie IONAME(BeginOpenUnit)( // OPEN(without NEWUNIT=) ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; bool wasExtant{false}; @@ -383,7 +384,7 @@ Cookie IODEF(BeginOpenUnit)( // OPEN(without NEWUNIT=) } } -Cookie IODEF(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) +Cookie IONAME(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; ExternalFileUnit &unit{ @@ -393,7 +394,7 @@ Cookie IODEF(BeginOpenNewUnit)( // OPEN(NEWUNIT=j) sourceLine); } -Cookie IODEF(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, +Cookie IONAME(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -409,12 +410,12 @@ Cookie IODEF(BeginWait)(ExternalUnit unitNumber, AsynchronousId id, terminator, unitNumber, id == 0 ? IostatOk : IostatBadWaitUnit); } } -Cookie IODEF(BeginWaitAll)( +Cookie IONAME(BeginWaitAll)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { return IONAME(BeginWait)(unitNumber, 0 /*no ID=*/, sourceFile, sourceLine); } -Cookie IODEF(BeginClose)( +Cookie IONAME(BeginClose)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -433,7 +434,7 @@ Cookie IODEF(BeginClose)( } } -Cookie IODEF(BeginFlush)( +Cookie IONAME(BeginFlush)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -451,7 +452,7 @@ Cookie IODEF(BeginFlush)( } } -Cookie IODEF(BeginBackspace)( +Cookie IONAME(BeginBackspace)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -469,7 +470,7 @@ Cookie IODEF(BeginBackspace)( } } -Cookie IODEF(BeginEndfile)( +Cookie IONAME(BeginEndfile)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -489,7 +490,7 @@ Cookie IODEF(BeginEndfile)( } } -Cookie IODEF(BeginRewind)( +Cookie IONAME(BeginRewind)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; Cookie errorCookie{nullptr}; @@ -509,7 +510,7 @@ Cookie IODEF(BeginRewind)( } } -Cookie IODEF(BeginInquireUnit)( +Cookie IONAME(BeginInquireUnit)( ExternalUnit unitNumber, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp(unitNumber)}) { @@ -529,14 +530,14 @@ Cookie IODEF(BeginInquireUnit)( } } -Cookie IODEF(BeginInquireFile)(const char *path, std::size_t pathLength, +Cookie IONAME(BeginInquireFile)(const char *path, std::size_t pathLength, const char *sourceFile, int sourceLine) { Terminator terminator{sourceFile, sourceLine}; auto trimmed{SaveDefaultCharacter( path, TrimTrailingSpaces(path, pathLength), terminator)}; if (ExternalFileUnit * unit{ExternalFileUnit::LookUp( - trimmed.get(), Fortran::runtime::strlen(trimmed.get()))}) { + trimmed.get(), std::strlen(trimmed.get()))}) { // INQUIRE(FILE=) to a connected unit if (ChildIo * child{unit->GetChildIo()}) { return &child->BeginIoStatement( @@ -553,7 +554,7 @@ Cookie IODEF(BeginInquireFile)(const char *path, std::size_t pathLength, } } -Cookie IODEF(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { +Cookie IONAME(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { Terminator oom{sourceFile, sourceLine}; return &New{oom}(sourceFile, sourceLine) .release() @@ -562,7 +563,7 @@ Cookie IODEF(BeginInquireIoLength)(const char *sourceFile, int sourceLine) { // Control list items -void IODEF(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, +void IONAME(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, bool hasEnd, bool hasEor, bool hasIoMsg) { IoErrorHandler &handler{cookie->GetIoErrorHandler()}; if (hasIoStat) { @@ -582,8 +583,8 @@ void IODEF(EnableHandlers)(Cookie cookie, bool hasIoStat, bool hasErr, } } -static RT_API_ATTRS bool YesOrNo(const char *keyword, std::size_t length, - const char *what, IoErrorHandler &handler) { +static bool YesOrNo(const char *keyword, std::size_t length, const char *what, + IoErrorHandler &handler) { static const char *keywords[]{"YES", "NO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { case 0: @@ -597,7 +598,8 @@ static RT_API_ATTRS bool YesOrNo(const char *keyword, std::size_t length, } } -bool IODEF(SetAdvance)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetAdvance)( + Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; bool nonAdvancing{!YesOrNo(keyword, length, "ADVANCE", handler)}; @@ -614,7 +616,7 @@ bool IODEF(SetAdvance)(Cookie cookie, const char *keyword, std::size_t length) { return !handler.InError(); } -bool IODEF(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"NULL", "ZERO", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -631,7 +633,8 @@ bool IODEF(SetBlank)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetDecimal)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetDecimal)( + Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"COMMA", "POINT", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -648,7 +651,7 @@ bool IODEF(SetDecimal)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"APOSTROPHE", "QUOTE", "NONE", nullptr}; switch (IdentifyValue(keyword, length, keywords)) { @@ -668,14 +671,14 @@ bool IODEF(SetDelim)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetPad)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; io.mutableModes().pad = YesOrNo(keyword, length, "PAD", handler); return !handler.InError(); } -bool IODEF(SetPos)(Cookie cookie, std::int64_t pos) { +bool IONAME(SetPos)(Cookie cookie, std::int64_t pos) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -686,7 +689,7 @@ bool IODEF(SetPos)(Cookie cookie, std::int64_t pos) { return false; } -bool IODEF(SetRec)(Cookie cookie, std::int64_t rec) { +bool IONAME(SetRec)(Cookie cookie, std::int64_t rec) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *unit{io.GetExternalFileUnit()}) { @@ -702,7 +705,7 @@ bool IODEF(SetRec)(Cookie cookie, std::int64_t rec) { return true; } -bool IODEF(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{"UP", "DOWN", "ZERO", "NEAREST", "COMPATIBLE", "PROCESSOR_DEFINED", nullptr}; @@ -732,7 +735,7 @@ bool IODEF(SetRound)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; static const char *keywords[]{ "PLUS", "SUPPRESS", "PROCESSOR_DEFINED", nullptr}; @@ -751,7 +754,7 @@ bool IODEF(SetSign)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -787,7 +790,7 @@ bool IODEF(SetAccess)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IODEF(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -829,7 +832,7 @@ bool IODEF(SetAction)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IODEF(SetAsynchronous)( +bool IONAME(SetAsynchronous)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; @@ -856,7 +859,7 @@ bool IODEF(SetAsynchronous)( return !handler.InError(); } -bool IODEF(SetCarriagecontrol)( +bool IONAME(SetCarriagecontrol)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -888,7 +891,8 @@ bool IODEF(SetCarriagecontrol)( } } -bool IODEF(SetConvert)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetConvert)( + Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -912,7 +916,7 @@ bool IODEF(SetConvert)(Cookie cookie, const char *keyword, std::size_t length) { } } -bool IODEF(SetEncoding)( +bool IONAME(SetEncoding)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -944,7 +948,7 @@ bool IODEF(SetEncoding)( return true; } -bool IODEF(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -972,7 +976,7 @@ bool IODEF(SetForm)(Cookie cookie, const char *keyword, std::size_t length) { return true; } -bool IODEF(SetPosition)( +bool IONAME(SetPosition)( Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; @@ -1005,7 +1009,7 @@ bool IODEF(SetPosition)( return true; } -bool IODEF(SetRecl)(Cookie cookie, std::size_t n) { +bool IONAME(SetRecl)(Cookie cookie, std::size_t n) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1032,7 +1036,7 @@ bool IODEF(SetRecl)(Cookie cookie, std::size_t n) { } } -bool IODEF(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { +bool IONAME(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1086,7 +1090,7 @@ bool IODEF(SetStatus)(Cookie cookie, const char *keyword, std::size_t length) { "SetStatus() called when not in an OPEN or CLOSE statement"); } -bool IODEF(SetFile)(Cookie cookie, const char *path, std::size_t chars) { +bool IONAME(SetFile)(Cookie cookie, const char *path, std::size_t chars) { IoStatementState &io{*cookie}; if (auto *open{io.get_if()}) { if (open->completedOperation()) { @@ -1103,7 +1107,7 @@ bool IODEF(SetFile)(Cookie cookie, const char *path, std::size_t chars) { return false; } -bool IODEF(GetNewUnit)(Cookie cookie, int &unit, int kind) { +bool IONAME(GetNewUnit)(Cookie cookie, int &unit, int kind) { IoStatementState &io{*cookie}; auto *open{io.get_if()}; if (!open) { @@ -1131,15 +1135,15 @@ bool IODEF(GetNewUnit)(Cookie cookie, int &unit, int kind) { // Data transfers -bool IODEF(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IONAME(OutputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { +bool IONAME(InputDescriptor)(Cookie cookie, const Descriptor &descriptor) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { +bool IONAME(OutputInteger8)(Cookie cookie, std::int8_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger8")) { return false; } @@ -1150,7 +1154,7 @@ bool IODEF(OutputInteger8)(Cookie cookie, std::int8_t n) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) { +bool IONAME(OutputInteger16)(Cookie cookie, std::int16_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger16")) { return false; } @@ -1161,6 +1165,7 @@ bool IODEF(OutputInteger16)(Cookie cookie, std::int16_t n) { return descr::DescriptorIO(*cookie, descriptor); } +RT_EXT_API_GROUP_BEGIN bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger32")) { return false; @@ -1171,8 +1176,9 @@ bool IODEF(OutputInteger32)(Cookie cookie, std::int32_t n) { TypeCategory::Integer, 4, reinterpret_cast(&n), 0); return descr::DescriptorIO(*cookie, descriptor); } +RT_EXT_API_GROUP_END -bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { +bool IONAME(OutputInteger64)(Cookie cookie, std::int64_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger64")) { return false; } @@ -1184,7 +1190,7 @@ bool IODEF(OutputInteger64)(Cookie cookie, std::int64_t n) { } #ifdef __SIZEOF_INT128__ -bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { +bool IONAME(OutputInteger128)(Cookie cookie, common::int128_t n) { if (!cookie->CheckFormattedStmtType("OutputInteger128")) { return false; } @@ -1196,7 +1202,7 @@ bool IODEF(OutputInteger128)(Cookie cookie, common::int128_t n) { } #endif -bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { +bool IONAME(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { if (!cookie->CheckFormattedStmtType("InputInteger")) { return false; } @@ -1207,7 +1213,7 @@ bool IODEF(InputInteger)(Cookie cookie, std::int64_t &n, int kind) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputReal32)(Cookie cookie, float x) { +bool IONAME(OutputReal32)(Cookie cookie, float x) { if (!cookie->CheckFormattedStmtType("OutputReal32")) { return false; } @@ -1217,7 +1223,7 @@ bool IODEF(OutputReal32)(Cookie cookie, float x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputReal64)(Cookie cookie, double x) { +bool IONAME(OutputReal64)(Cookie cookie, double x) { if (!cookie->CheckFormattedStmtType("OutputReal64")) { return false; } @@ -1227,7 +1233,7 @@ bool IODEF(OutputReal64)(Cookie cookie, double x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputReal32)(Cookie cookie, float &x) { +bool IONAME(InputReal32)(Cookie cookie, float &x) { if (!cookie->CheckFormattedStmtType("InputReal32")) { return false; } @@ -1237,7 +1243,7 @@ bool IODEF(InputReal32)(Cookie cookie, float &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputReal64)(Cookie cookie, double &x) { +bool IONAME(InputReal64)(Cookie cookie, double &x) { if (!cookie->CheckFormattedStmtType("InputReal64")) { return false; } @@ -1247,7 +1253,7 @@ bool IODEF(InputReal64)(Cookie cookie, double &x) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputComplex32)(Cookie cookie, float r, float i) { +bool IONAME(OutputComplex32)(Cookie cookie, float r, float i) { if (!cookie->CheckFormattedStmtType("OutputComplex32")) { return false; } @@ -1259,7 +1265,7 @@ bool IODEF(OutputComplex32)(Cookie cookie, float r, float i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputComplex64)(Cookie cookie, double r, double i) { +bool IONAME(OutputComplex64)(Cookie cookie, double r, double i) { if (!cookie->CheckFormattedStmtType("OutputComplex64")) { return false; } @@ -1271,7 +1277,7 @@ bool IODEF(OutputComplex64)(Cookie cookie, double r, double i) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputComplex32)(Cookie cookie, float z[2]) { +bool IONAME(InputComplex32)(Cookie cookie, float z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex32")) { return false; } @@ -1282,7 +1288,7 @@ bool IODEF(InputComplex32)(Cookie cookie, float z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputComplex64)(Cookie cookie, double z[2]) { +bool IONAME(InputComplex64)(Cookie cookie, double z[2]) { if (!cookie->CheckFormattedStmtType("InputComplex64")) { return false; } @@ -1293,7 +1299,7 @@ bool IODEF(InputComplex64)(Cookie cookie, double z[2]) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputCharacter)( +bool IONAME(OutputCharacter)( Cookie cookie, const char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("OutputCharacter")) { return false; @@ -1305,11 +1311,11 @@ bool IODEF(OutputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { +bool IONAME(OutputAscii)(Cookie cookie, const char *x, std::size_t length) { return IONAME(OutputCharacter(cookie, x, length, 1)); } -bool IODEF(InputCharacter)( +bool IONAME(InputCharacter)( Cookie cookie, char *x, std::size_t length, int kind) { if (!cookie->CheckFormattedStmtType("InputCharacter")) { return false; @@ -1320,11 +1326,11 @@ bool IODEF(InputCharacter)( return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputAscii)(Cookie cookie, char *x, std::size_t length) { +bool IONAME(InputAscii)(Cookie cookie, char *x, std::size_t length) { return IONAME(InputCharacter)(cookie, x, length, 1); } -bool IODEF(OutputLogical)(Cookie cookie, bool truth) { +bool IONAME(OutputLogical)(Cookie cookie, bool truth) { if (!cookie->CheckFormattedStmtType("OutputLogical")) { return false; } @@ -1335,7 +1341,7 @@ bool IODEF(OutputLogical)(Cookie cookie, bool truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(InputLogical)(Cookie cookie, bool &truth) { +bool IONAME(InputLogical)(Cookie cookie, bool &truth) { if (!cookie->CheckFormattedStmtType("InputLogical")) { return false; } @@ -1346,17 +1352,17 @@ bool IODEF(InputLogical)(Cookie cookie, bool &truth) { return descr::DescriptorIO(*cookie, descriptor); } -bool IODEF(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IONAME(OutputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -bool IODEF(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, +bool IONAME(InputDerivedType)(Cookie cookie, const Descriptor &descriptor, const NonTbpDefinedIoTable *table) { return descr::DescriptorIO(*cookie, descriptor, table); } -std::size_t IODEF(GetSize)(Cookie cookie) { +std::size_t IONAME(GetSize)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1373,7 +1379,7 @@ std::size_t IODEF(GetSize)(Cookie cookie) { return 0; } -std::size_t IODEF(GetIoLength)(Cookie cookie) { +std::size_t IONAME(GetIoLength)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1389,7 +1395,7 @@ std::size_t IODEF(GetIoLength)(Cookie cookie) { return 0; } -void IODEF(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { +void IONAME(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (!handler.InError()) { @@ -1400,7 +1406,7 @@ void IODEF(GetIoMsg)(Cookie cookie, char *msg, std::size_t length) { } } -AsynchronousId IODEF(GetAsynchronousId)(Cookie cookie) { +AsynchronousId IONAME(GetAsynchronousId)(Cookie cookie) { IoStatementState &io{*cookie}; IoErrorHandler &handler{io.GetIoErrorHandler()}; if (auto *ext{io.get_if()}) { @@ -1413,24 +1419,24 @@ AsynchronousId IODEF(GetAsynchronousId)(Cookie cookie) { return 0; } -bool IODEF(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, +bool IONAME(InquireCharacter)(Cookie cookie, InquiryKeywordHash inquiry, char *result, std::size_t length) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result, length); } -bool IODEF(InquireLogical)( +bool IONAME(InquireLogical)( Cookie cookie, InquiryKeywordHash inquiry, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(inquiry, result); } -bool IODEF(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { +bool IONAME(InquirePendingId)(Cookie cookie, AsynchronousId id, bool &result) { IoStatementState &io{*cookie}; return io.Inquire(HashInquiryKeyword("PENDING"), id, result); } -bool IODEF(InquireInteger64)( +bool IONAME(InquireInteger64)( Cookie cookie, InquiryKeywordHash inquiry, std::int64_t &result, int kind) { IoStatementState &io{*cookie}; std::int64_t n{0}; // safe "undefined" value @@ -1446,15 +1452,17 @@ bool IODEF(InquireInteger64)( return false; } +RT_EXT_API_GROUP_BEGIN enum Iostat IODEF(EndIoStatement)(Cookie cookie) { IoStatementState &io{*cookie}; return static_cast(io.EndIoStatement()); } +RT_EXT_API_GROUP_END template -static RT_API_ATTRS enum Iostat CheckUnitNumberInRangeImpl(INT unit, - bool handleError, char *ioMsg, std::size_t ioMsgLength, - const char *sourceFile, int sourceLine) { +static enum Iostat CheckUnitNumberInRangeImpl(INT unit, bool handleError, + char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, + int sourceLine) { static_assert(sizeof(INT) >= sizeof(ExternalUnit), "only intended to be used when the INT to ExternalUnit conversion is " "narrowing"); @@ -1486,15 +1494,15 @@ static RT_API_ATTRS enum Iostat CheckUnitNumberInRangeImpl(INT unit, return IostatOk; } -enum Iostat IODEF(CheckUnitNumberInRange64)(std::int64_t unit, bool handleError, - char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, - int sourceLine) { +enum Iostat IONAME(CheckUnitNumberInRange64)(std::int64_t unit, + bool handleError, char *ioMsg, std::size_t ioMsgLength, + const char *sourceFile, int sourceLine) { return CheckUnitNumberInRangeImpl( unit, handleError, ioMsg, ioMsgLength, sourceFile, sourceLine); } #ifdef __SIZEOF_INT128__ -enum Iostat IODEF(CheckUnitNumberInRange128)(common::int128_t unit, +enum Iostat IONAME(CheckUnitNumberInRange128)(common::int128_t unit, bool handleError, char *ioMsg, std::size_t ioMsgLength, const char *sourceFile, int sourceLine) { return CheckUnitNumberInRangeImpl( @@ -1517,5 +1525,3 @@ void std::__libcpp_verbose_abort(char const *format, ...) { std::abort(); } #endif - -RT_EXT_API_GROUP_END diff --git a/flang/runtime/io-error.cpp b/flang/runtime/io-error.cpp index 7a90966..b006b82f 100644 --- a/flang/runtime/io-error.cpp +++ b/flang/runtime/io-error.cpp @@ -109,6 +109,8 @@ void IoErrorHandler::SignalPendingError() { SignalError(error); } +RT_OFFLOAD_API_GROUP_END + void IoErrorHandler::SignalErrno() { SignalError(errno); } bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { @@ -125,10 +127,7 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { // in LLVM v9.0.1 with inadequate modification for Fortran, // since rectified. bool ok{false}; -#if defined(RT_DEVICE_COMPILATION) - // strerror_r is not available on device. - msg = "errno description is not available on device"; -#elif HAVE_STRERROR_R +#if HAVE_STRERROR_R // strerror_r is thread-safe. #if defined(__GLIBC__) && defined(_GNU_SOURCE) // glibc defines its own incompatible version of strerror_r @@ -158,6 +157,4 @@ bool IoErrorHandler::GetIoMsg(char *buffer, std::size_t bufferLength) { return false; } } - -RT_OFFLOAD_API_GROUP_END } // namespace Fortran::runtime::io diff --git a/flang/runtime/io-error.h b/flang/runtime/io-error.h index 426573e..0fe11c9 100644 --- a/flang/runtime/io-error.h +++ b/flang/runtime/io-error.h @@ -61,7 +61,7 @@ public: RT_API_ATTRS void SignalPendingError(); RT_API_ATTRS int GetIoStat() const { return ioStat_; } - RT_API_ATTRS bool GetIoMsg(char *, std::size_t); + bool GetIoMsg(char *, std::size_t); private: enum Flag : std::uint8_t { diff --git a/flang/runtime/namelist.cpp b/flang/runtime/namelist.cpp index b9eed21..b502d41 100644 --- a/flang/runtime/namelist.cpp +++ b/flang/runtime/namelist.cpp @@ -17,20 +17,16 @@ namespace Fortran::runtime::io { -RT_VAR_GROUP_BEGIN // Max size of a group, symbol or component identifier that can appear in // NAMELIST input, plus a byte for NUL termination. -static constexpr RT_CONST_VAR_ATTRS std::size_t nameBufferSize{201}; -RT_VAR_GROUP_END +static constexpr std::size_t nameBufferSize{201}; -RT_OFFLOAD_API_GROUP_BEGIN - -static inline RT_API_ATTRS char32_t GetComma(IoStatementState &io) { +static inline char32_t GetComma(IoStatementState &io) { return io.mutableModes().editingFlags & decimalComma ? char32_t{';'} : char32_t{','}; } -bool IODEF(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IONAME(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("OutputNamelist"); io.mutableModes().inNamelist = true; @@ -44,8 +40,7 @@ bool IODEF(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { if ((connection.NeedAdvance(prefixLen) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1))) || !EmitAscii(io, prefix, prefixLen) || - (connection.NeedAdvance( - Fortran::runtime::strlen(str) + (suffix != ' ')) && + (connection.NeedAdvance(std::strlen(str) + (suffix != ' ')) && !(io.AdvanceRecord() && EmitAscii(io, " ", 1)))) { return false; } @@ -89,20 +84,20 @@ bool IODEF(OutputNamelist)(Cookie cookie, const NamelistGroup &group) { return EmitUpperCase("/", 1, "", ' '); } -static constexpr RT_API_ATTRS bool IsLegalIdStart(char32_t ch) { +static constexpr bool IsLegalIdStart(char32_t ch) { return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z') || ch == '_' || ch == '@'; } -static constexpr RT_API_ATTRS bool IsLegalIdChar(char32_t ch) { +static constexpr bool IsLegalIdChar(char32_t ch) { return IsLegalIdStart(ch) || (ch >= '0' && ch <= '9'); } -static constexpr RT_API_ATTRS char NormalizeIdChar(char32_t ch) { +static constexpr char NormalizeIdChar(char32_t ch) { return static_cast(ch >= 'A' && ch <= 'Z' ? ch - 'A' + 'a' : ch); } -static RT_API_ATTRS bool GetLowerCaseName( +static bool GetLowerCaseName( IoStatementState &io, char buffer[], std::size_t maxLength) { std::size_t byteLength{0}; if (auto ch{io.GetNextNonBlank(byteLength)}) { @@ -124,7 +119,7 @@ static RT_API_ATTRS bool GetLowerCaseName( return false; } -static RT_API_ATTRS Fortran::common::optional GetSubscriptValue( +static Fortran::common::optional GetSubscriptValue( IoStatementState &io) { Fortran::common::optional value; std::size_t byteCount{0}; @@ -157,8 +152,8 @@ static RT_API_ATTRS Fortran::common::optional GetSubscriptValue( return value; } -static RT_API_ATTRS bool HandleSubscripts(IoStatementState &io, - Descriptor &desc, const Descriptor &source, const char *name) { +static bool HandleSubscripts(IoStatementState &io, Descriptor &desc, + const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; // Allow for blanks in subscripts; they're nonstandard, but not // ambiguous within the parentheses. @@ -257,7 +252,7 @@ static RT_API_ATTRS bool HandleSubscripts(IoStatementState &io, return false; } -static RT_API_ATTRS void StorageSequenceExtension( +static void StorageSequenceExtension( Descriptor &desc, const Descriptor &source) { // Support the near-universal extension of NAMELIST input into a // designatable storage sequence identified by its initial scalar array @@ -279,7 +274,7 @@ static RT_API_ATTRS void StorageSequenceExtension( } } -static RT_API_ATTRS bool HandleSubstring( +static bool HandleSubstring( IoStatementState &io, Descriptor &desc, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; auto pair{desc.type().GetCategoryAndKind()}; @@ -340,7 +335,7 @@ static RT_API_ATTRS bool HandleSubstring( return false; } -static RT_API_ATTRS bool HandleComponent(IoStatementState &io, Descriptor &desc, +static bool HandleComponent(IoStatementState &io, Descriptor &desc, const Descriptor &source, const char *name) { IoErrorHandler &handler{io.GetIoErrorHandler()}; char compName[nameBufferSize]; @@ -349,8 +344,7 @@ static RT_API_ATTRS bool HandleComponent(IoStatementState &io, Descriptor &desc, if (const typeInfo::DerivedType * type{addendum ? addendum->derivedType() : nullptr}) { if (const typeInfo::Component * - comp{type->FindDataComponent( - compName, Fortran::runtime::strlen(compName))}) { + comp{type->FindDataComponent(compName, std::strlen(compName))}) { bool createdDesc{false}; if (comp->rank() > 0 && source.rank() > 0) { // If base and component are both arrays, the component name @@ -414,7 +408,7 @@ static RT_API_ATTRS bool HandleComponent(IoStatementState &io, Descriptor &desc, // Advance to the terminal '/' of a namelist group or leading '&'/'$' // of the next. -static RT_API_ATTRS void SkipNamelistGroup(IoStatementState &io) { +static void SkipNamelistGroup(IoStatementState &io) { std::size_t byteCount{0}; while (auto ch{io.GetNextNonBlank(byteCount)}) { io.HandleRelativePosition(byteCount); @@ -437,7 +431,7 @@ static RT_API_ATTRS void SkipNamelistGroup(IoStatementState &io) { } } -bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { +bool IONAME(InputNamelist)(Cookie cookie, const NamelistGroup &group) { IoStatementState &io{*cookie}; io.CheckFormattedStmtType("InputNamelist"); io.mutableModes().inNamelist = true; @@ -476,7 +470,7 @@ bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { handler.SignalError("NAMELIST input group has no name"); return false; } - if (Fortran::runtime::strcmp(group.groupName, name) == 0) { + if (std::strcmp(group.groupName, name) == 0) { break; // found it } SkipNamelistGroup(io); @@ -495,7 +489,7 @@ bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { } std::size_t itemIndex{0}; for (; itemIndex < group.items; ++itemIndex) { - if (Fortran::runtime::strcmp(name, group.item[itemIndex].name) == 0) { + if (std::strcmp(name, group.item[itemIndex].name) == 0) { break; } } @@ -596,6 +590,8 @@ bool IODEF(InputNamelist)(Cookie cookie, const NamelistGroup &group) { return true; } +RT_OFFLOAD_API_GROUP_BEGIN + bool IsNamelistNameOrSlash(IoStatementState &io) { if (auto *listInput{ io.get_if>()}) { -- cgit v1.1 From bbd259af0a4cc438dd02d5ee632cb2dc1def1f6a Mon Sep 17 00:00:00 2001 From: Yitzhak Mandelbaum Date: Thu, 4 Apr 2024 08:39:51 -0400 Subject: [clang][dataflow] Refactor `widen` API to be explicit about change effect. (#87233) The previous API relied on pointer equality of inputs and outputs to signal whether a change occured. This was too subtle and led to bugs in practice. It was also very limiting: the override could not return an equivalent (but not identical) value. --- .../Analysis/FlowSensitive/DataflowEnvironment.h | 50 +++++++++------ .../clang/Analysis/FlowSensitive/DataflowLattice.h | 8 +-- .../Analysis/FlowSensitive/DataflowEnvironment.cpp | 49 +++++++-------- .../TypeErasedDataflowAnalysisTest.cpp | 72 +++++++++++++++------- 4 files changed, 111 insertions(+), 68 deletions(-) diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h index c30bccd..9a65f76 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h @@ -43,6 +43,15 @@ enum class ComparisonResult { Unknown, }; +/// The result of a `widen` operation. +struct WidenResult { + /// Non-null pointer to a potentially widened version of the input value. + Value *V; + /// Whether `V` represents a "change" (that is, a different value) with + /// respect to the previous value in the sequence. + LatticeEffect Effect; +}; + /// Holds the state of the program (store and heap) at a given program point. /// /// WARNING: Symbolic values that are created by the environment for static @@ -104,14 +113,17 @@ public: /// serve as a comparison operation, by indicating whether the widened value /// is equivalent to the previous value. /// - /// Returns either: - /// - /// `nullptr`, if this value is not of interest to the model, or - /// - /// `&Prev`, if the widened value is equivalent to `Prev`, or - /// - /// A non-null value that approximates `Current`. `Prev` is available to - /// inform the chosen approximation. + /// Returns one of the folowing: + /// * `std::nullopt`, if this value is not of interest to the + /// model. + /// * A `WidenResult` with: + /// * A non-null `Value *` that points either to `Current` or a widened + /// version of `Current`. This value must be consistent with + /// the flow condition of `CurrentEnv`. We particularly caution + /// against using `Prev`, which is rarely consistent. + /// * A `LatticeEffect` indicating whether the value should be + /// considered a new value (`Changed`) or one *equivalent* (if not + /// necessarily equal) to `Prev` (`Unchanged`). /// /// `PrevEnv` and `CurrentEnv` can be used to query child values and path /// condition implications of `Prev` and `Current`, respectively. @@ -122,17 +134,19 @@ public: /// /// `Prev` and `Current` must be assigned to the same storage location in /// `PrevEnv` and `CurrentEnv`, respectively. - virtual Value *widen(QualType Type, Value &Prev, const Environment &PrevEnv, - Value &Current, Environment &CurrentEnv) { + virtual std::optional widen(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, + Environment &CurrentEnv) { // The default implementation reduces to just comparison, since comparison // is required by the API, even if no widening is performed. switch (compare(Type, Prev, PrevEnv, Current, CurrentEnv)) { - case ComparisonResult::Same: - return &Prev; - case ComparisonResult::Different: - return &Current; - case ComparisonResult::Unknown: - return nullptr; + case ComparisonResult::Unknown: + return std::nullopt; + case ComparisonResult::Same: + return WidenResult{&Current, LatticeEffect::Unchanged}; + case ComparisonResult::Different: + return WidenResult{&Current, LatticeEffect::Changed}; } llvm_unreachable("all cases in switch covered"); } @@ -236,8 +250,8 @@ public: /// /// `PrevEnv` must be the immediate previous version of the environment. /// `PrevEnv` and `this` must use the same `DataflowAnalysisContext`. - LatticeJoinEffect widen(const Environment &PrevEnv, - Environment::ValueModel &Model); + LatticeEffect widen(const Environment &PrevEnv, + Environment::ValueModel &Model); // FIXME: Rename `createOrGetStorageLocation` to `getOrCreateStorageLocation`, // `getStableStorageLocation`, or something more appropriate. diff --git a/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h index 0c81e2f..b262732 100644 --- a/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h +++ b/clang/include/clang/Analysis/FlowSensitive/DataflowLattice.h @@ -17,13 +17,13 @@ namespace clang { namespace dataflow { -/// Effect indicating whether a lattice join operation resulted in a new value. -// FIXME: Rename to `LatticeEffect` since `widen` uses it as well, and we are -// likely removing it from `join`. -enum class LatticeJoinEffect { +/// Effect indicating whether a lattice operation resulted in a new value. +enum class LatticeEffect { Unchanged, Changed, }; +// DEPRECATED. Use `LatticeEffect`. +using LatticeJoinEffect = LatticeEffect; } // namespace dataflow } // namespace clang diff --git a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp index f729d67..70ac076 100644 --- a/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp +++ b/clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp @@ -166,17 +166,16 @@ static Value *joinDistinctValues(QualType Type, Value &Val1, return JoinedVal; } -// When widening does not change `Current`, return value will equal `&Prev`. -static Value &widenDistinctValues(QualType Type, Value &Prev, - const Environment &PrevEnv, Value &Current, - Environment &CurrentEnv, - Environment::ValueModel &Model) { +static WidenResult widenDistinctValues(QualType Type, Value &Prev, + const Environment &PrevEnv, + Value &Current, Environment &CurrentEnv, + Environment::ValueModel &Model) { // Boolean-model widening. if (auto *PrevBool = dyn_cast(&Prev)) { - // If previous value was already Top, re-use that to (implicitly) indicate - // that no change occurred. if (isa(Prev)) - return Prev; + // Safe to return `Prev` here, because Top is never dependent on the + // environment. + return {&Prev, LatticeEffect::Unchanged}; // We may need to widen to Top, but before we do so, check whether both // values are implied to be either true or false in the current environment. @@ -185,22 +184,24 @@ static Value &widenDistinctValues(QualType Type, Value &Prev, bool TruePrev = PrevEnv.proves(PrevBool->formula()); bool TrueCur = CurrentEnv.proves(CurBool.formula()); if (TruePrev && TrueCur) - return CurrentEnv.getBoolLiteralValue(true); + return {&CurrentEnv.getBoolLiteralValue(true), LatticeEffect::Unchanged}; if (!TruePrev && !TrueCur && PrevEnv.proves(PrevEnv.arena().makeNot(PrevBool->formula())) && CurrentEnv.proves(CurrentEnv.arena().makeNot(CurBool.formula()))) - return CurrentEnv.getBoolLiteralValue(false); + return {&CurrentEnv.getBoolLiteralValue(false), LatticeEffect::Unchanged}; - return CurrentEnv.makeTopBoolValue(); + return {&CurrentEnv.makeTopBoolValue(), LatticeEffect::Changed}; } // FIXME: Add other built-in model widening. // Custom-model widening. - if (auto *W = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) - return *W; + if (auto Result = Model.widen(Type, Prev, PrevEnv, Current, CurrentEnv)) + return *Result; - return equateUnknownValues(Prev.getKind()) ? Prev : Current; + return {&Current, equateUnknownValues(Prev.getKind()) + ? LatticeEffect::Unchanged + : LatticeEffect::Changed}; } // Returns whether the values in `Map1` and `Map2` compare equal for those @@ -271,7 +272,7 @@ llvm::MapVector widenKeyToValueMap(const llvm::MapVector &CurMap, const llvm::MapVector &PrevMap, Environment &CurEnv, const Environment &PrevEnv, - Environment::ValueModel &Model, LatticeJoinEffect &Effect) { + Environment::ValueModel &Model, LatticeEffect &Effect) { llvm::MapVector WidenedMap; for (auto &Entry : CurMap) { Key K = Entry.first; @@ -290,11 +291,11 @@ widenKeyToValueMap(const llvm::MapVector &CurMap, continue; } - Value &WidenedVal = widenDistinctValues(K->getType(), *PrevIt->second, - PrevEnv, *Val, CurEnv, Model); - WidenedMap.insert({K, &WidenedVal}); - if (&WidenedVal != PrevIt->second) - Effect = LatticeJoinEffect::Changed; + auto [WidenedVal, ValEffect] = widenDistinctValues( + K->getType(), *PrevIt->second, PrevEnv, *Val, CurEnv, Model); + WidenedMap.insert({K, WidenedVal}); + if (ValEffect == LatticeEffect::Changed) + Effect = LatticeEffect::Changed; } return WidenedMap; @@ -617,15 +618,15 @@ bool Environment::equivalentTo(const Environment &Other, return true; } -LatticeJoinEffect Environment::widen(const Environment &PrevEnv, - Environment::ValueModel &Model) { +LatticeEffect Environment::widen(const Environment &PrevEnv, + Environment::ValueModel &Model) { assert(DACtx == PrevEnv.DACtx); assert(ReturnVal == PrevEnv.ReturnVal); assert(ReturnLoc == PrevEnv.ReturnLoc); assert(ThisPointeeLoc == PrevEnv.ThisPointeeLoc); assert(CallStack == PrevEnv.CallStack); - auto Effect = LatticeJoinEffect::Unchanged; + auto Effect = LatticeEffect::Unchanged; // By the API, `PrevEnv` is a previous version of the environment for the same // block, so we have some guarantees about its shape. In particular, it will @@ -646,7 +647,7 @@ LatticeJoinEffect Environment::widen(const Environment &PrevEnv, ExprToLoc.size() != PrevEnv.ExprToLoc.size() || ExprToVal.size() != PrevEnv.ExprToVal.size() || LocToVal.size() != PrevEnv.LocToVal.size()) - Effect = LatticeJoinEffect::Changed; + Effect = LatticeEffect::Changed; return Effect; } diff --git a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp index bea00ab..b0b579d 100644 --- a/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp +++ b/clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp @@ -805,6 +805,25 @@ public: else JoinedVal.setProperty("is_null", JoinedEnv.makeTopBoolValue()); } + + std::optional widen(QualType Type, Value &Prev, + const Environment &PrevEnv, Value &Current, + Environment &CurrentEnv) override { + switch (compare(Type, Prev, PrevEnv, Current, CurrentEnv)) { + case ComparisonResult::Same: + return WidenResult{&Current, LatticeJoinEffect::Unchanged}; + case ComparisonResult::Different: { + auto &CurPtr = cast(Current); + auto &WidenedPtr = + CurrentEnv.create(CurPtr.getPointeeLoc()); + WidenedPtr.setProperty("is_null", CurrentEnv.makeTopBoolValue()); + return WidenResult{&WidenedPtr, LatticeJoinEffect::Changed}; + } + case ComparisonResult::Unknown: + return std::nullopt; + } + llvm_unreachable("all cases in switch covered"); + } }; class WideningTest : public Test { @@ -846,7 +865,6 @@ TEST_F(WideningTest, JoinDistinctValuesWithDistinctProperties) { Code, [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { - ASSERT_THAT(Results.keys(), UnorderedElementsAre("p1", "p2", "p3")); const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); const Environment &Env3 = getEnvironmentAtAnnotation(Results, "p3"); @@ -889,8 +907,6 @@ TEST_F(WideningTest, JoinDistinctValuesWithSameProperties) { Code, [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { - ASSERT_THAT(Results.keys(), - UnorderedElementsAre("p1", "p2", "p3", "p4")); const Environment &Env1 = getEnvironmentAtAnnotation(Results, "p1"); const Environment &Env2 = getEnvironmentAtAnnotation(Results, "p2"); const Environment &Env3 = getEnvironmentAtAnnotation(Results, "p3"); @@ -929,19 +945,11 @@ TEST_F(WideningTest, DistinctPointersToTheSameLocationAreEquivalent) { Code, [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { - ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); - ASSERT_THAT(FooDecl, NotNull()); - - const ValueDecl *BarDecl = findValueDecl(ASTCtx, "Bar"); - ASSERT_THAT(BarDecl, NotNull()); - - const auto *FooLoc = - cast(Env.getStorageLocation(*FooDecl)); - const auto *BarVal = cast(Env.getValue(*BarDecl)); - EXPECT_EQ(&BarVal->getPointeeLoc(), FooLoc); + const auto &FooLoc = + getLocForDecl(ASTCtx, Env, "Foo"); + const auto &BarVal = getValueForDecl(ASTCtx, Env, "Bar"); + EXPECT_EQ(&BarVal.getPointeeLoc(), &FooLoc); }); } @@ -963,18 +971,38 @@ TEST_F(WideningTest, DistinctValuesWithSamePropertiesAreEquivalent) { Code, [](const llvm::StringMap> &Results, ASTContext &ASTCtx) { - ASSERT_THAT(Results.keys(), UnorderedElementsAre("p")); const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); - - const ValueDecl *FooDecl = findValueDecl(ASTCtx, "Foo"); - ASSERT_THAT(FooDecl, NotNull()); - - const auto *FooVal = Env.getValue(*FooDecl); - EXPECT_EQ(FooVal->getProperty("is_null"), + const auto &FooVal = getValueForDecl(ASTCtx, Env, "Foo"); + EXPECT_EQ(FooVal.getProperty("is_null"), &Env.getBoolLiteralValue(false)); }); } +TEST_F(WideningTest, DistinctValuesWithDifferentPropertiesWidenedToTop) { + std::string Code = R"( + void target(bool Cond) { + int *Foo; + int i = 0; + Foo = nullptr; + while (Cond) { + Foo = &i; + } + (void)0; + /*[[p]]*/ + } + )"; + runDataflow( + Code, + [](const llvm::StringMap> &Results, + ASTContext &ASTCtx) { + const Environment &Env = getEnvironmentAtAnnotation(Results, "p"); + const auto &FooVal = getValueForDecl(ASTCtx, Env, "Foo"); + ASSERT_THAT(FooVal.getProperty("is_null"), NotNull()); + EXPECT_TRUE(areEquivalentValues(*FooVal.getProperty("is_null"), + Env.makeTopBoolValue())); + }); +} + class FlowConditionTest : public Test { protected: template -- cgit v1.1 From 7b5255297dca377a37c8df066e9d9749ab96cfad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Andrzej=20Warzy=C5=84ski?= Date: Thu, 4 Apr 2024 13:40:08 +0100 Subject: [mlir][test] Make SME e2e tests require an emulator (#86489) Integration tests for ArmSME require an emulator (there's no hardware available). Make sure that CMake complains if `MLIR_RUN_ARM_SME_TESTS` is set while `ARM_EMULATOR_EXECUTABLE` is empty. I'm also adding a note in the docs for future reference. --- mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake | 101 +++++++++++++++++++++ mlir/docs/Dialects/ArmSME.md | 10 +- mlir/test/CMakeLists.txt | 6 ++ 3 files changed, 116 insertions(+), 1 deletion(-) create mode 100644 mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake diff --git a/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake b/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake new file mode 100644 index 0000000..fff0424 --- /dev/null +++ b/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake @@ -0,0 +1,101 @@ +# A collection of helper CMake functions to detect hardware capabilities. At +# the moment these are used when configuring MLIR integration tests. + +# Checks whether the specified hardware capability is supported by the host +# Linux system. This is implemented by checking auxiliary vector feature +# provided by the Linux kernel. +# +# check_hwcap( +# hwcap_spec +# output_var +# ) +# +# hwcap_spec - HWCAP value to check - these are defined in hwcap.h in the Linux +# kernel. +# +# output_var - Output variable to use to save the results (TRUE for supported, +# FALSE for not supported). +# +# EXAMPLES: +# +# check_hwcap("HWCAP2_SME" SME_EMULATOR_REQUIRED) +# +function(check_hwcap hwcap_spec output) + set(hwcap_test_src + [====[ + #include + #include + + int main(void) + { + long hwcaps = getauxval(AT_); + return (hwcaps & ) != 0; + } + ]====] + ) + + # Extract from $hwcap_spec whether this is AT_HWCAP or AT_HWCAP2 + string(FIND ${hwcap_spec} "_" wsloc) + string(SUBSTRING ${hwcap_spec} 0 ${wsloc} hwcap_vec) + + string(REPLACE "" ${hwcap_vec} hwcap_test_src "${hwcap_test_src}") + string(REPLACE "" ${hwcap_spec} hwcap_test_src "${hwcap_test_src}") + + set(hwcap_test_file ${CMAKE_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/hwcap_check.c) + file(WRITE ${hwcap_test_file} "${hwcap_test_src}") + + # Compile _and_ run + try_run( + test_run_result test_compile_result + ${CMAKE_BINARY_DIR} + ${hwcap_test_file} + ) + # Compilation will fail if hwcap_spec is not defined - this usually means + # that your Linux kernel is too old. + if(${test_compile_result} AND (DEFINED test_run_result)) + message(${test_run_result}) + message(STATUS "Checking whether ${hwcap_spec} is supported by the host system: ${test_run_result}") + set(${output} ${test_run_result} PARENT_SCOPE) + else() + message(STATUS "Checking whether ${hwcap_spec} is supported by the host system: FALSE") + endif() +endfunction(check_hwcap) + +# For the given group of e2e tests (defined by the `mlir_e2e_tests` flag), +# checks whether an emulator is required. If yes, verifies that the +# corresponding CMake var pointing to an emulator (`emulator_exec`) has been +# set. +# +# check_emulator( +# mlir_e2e_tests +# hwcap_spec +# emulator_exec +# ) +# +# mlir_e2e_tests - MLIR CMake variables corresponding to the group of e2e tests +# to check +# hwcap_spec - HWCAP value to check. This should correspond to the hardware +# capabilities required by the tests to be checked. Possible +# values are defined in hwcap.h in the Linux kernel. +# emulator_exec - variable the defines the emulator (ought to be set if +# required, can be empty otherwise). +# +# EXAMPLES: +# +# check_emulator(MLIR_RUN_ARM_SVE_TESTS "HWCAP_SVE" ARM_EMULATOR_EXECUTABLE) +# +function(check_emulator mlir_e2e_tests hwcap_spec emulator_exec) + if (NOT ${mlir_e2e_tests}) + return() + endif() + + check_hwcap(${hwcap_spec} emulator_not_required) + if (${emulator_not_required}) + return() + endif() + + if (${emulator_exec} STREQUAL "") + message(FATAL_ERROR "${mlir_e2e_tests} requires an emulator, but ${emulator_exec} is not set") + endif() + +endfunction() diff --git a/mlir/docs/Dialects/ArmSME.md b/mlir/docs/Dialects/ArmSME.md index 7326150..ce0a76e 100644 --- a/mlir/docs/Dialects/ArmSME.md +++ b/mlir/docs/Dialects/ArmSME.md @@ -6,7 +6,7 @@ This dialect defines custom and LLVM IR intrinsic operations that are used to target Arm Scalable Matrix Extension. Through the available conversion and ArmSME passes you can, for example, lower a [linalg.matmul](https://mlir.llvm.org/docs/Dialects/Linalg/#linalgmatmul-linalgmatmulop) -opereation to Arm SME +operation to Arm SME [FMOPA](https://developer.arm.com/documentation/ddi0602/2023-03/SME-Instructions/FMOPA--widening---Half-precision-floating-point-sum-of-outer-products-and-accumulate-) (floating-point outer product) operations. See one of the in-tree end-to-end integration tests for reference: @@ -14,6 +14,14 @@ integration tests for reference: * [Linalg/CPU/ArmSME/matmul.mlir](https://github.com/llvm/llvm-project/blob/main/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir) * [Vector/CPU/ArmSME/test-outerproduct-f64.mlir](https://github.com/llvm/llvm-project/blob/main/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir) +In order to run ArmSME integration tests, include these flags in the CMake +invocation when configuring LLVM and MLIR: +```bash + -DMLIR_INCLUDE_INTEGRATION_TESTS=On + -DMLIR_RUN_ARM_SME_TESTS=On + -DARM_EMULATOR_EXECUTABLE= +``` + These tests are run "post-commit" by the [clang-aarch64-sve-vla](https://lab.llvm.org/buildbot/#/builders/197) LLVM BuildBot worker. diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index baf07ea..5319a9c 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -1,3 +1,5 @@ +include(MLIRCheckHardwareFeatures) + add_subdirectory(CAPI) add_subdirectory(lib) @@ -39,6 +41,10 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS) option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.") option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.") + # Check whether an emulator is required - if yes then make sure that it's + # been set. + check_emulator(MLIR_RUN_ARM_SVE_TESTS "HWCAP_SVE" ARM_EMULATOR_EXECUTABLE) + check_emulator(MLIR_RUN_ARM_SME_TESTS "HWCAP2_SME" ARM_EMULATOR_EXECUTABLE) # The native target may not be enabled when cross compiling, raise an error. if(NOT MLIR_ENABLE_EXECUTION_ENGINE) -- cgit v1.1 From 8f9903db8aed496a2cdc75a614e6d2c65c1acc07 Mon Sep 17 00:00:00 2001 From: Jacek Caban Date: Thu, 4 Apr 2024 14:41:50 +0200 Subject: [LLD][COFF][NFC] Use getMachineArchType helper. (#87495) It's similar to #87370, but for lld-link. --- lld/COFF/Chunks.cpp | 28 ++++++++++++---------------- lld/COFF/Chunks.h | 6 ++++++ lld/COFF/Driver.cpp | 1 - lld/COFF/SymbolTable.cpp | 1 - 4 files changed, 18 insertions(+), 18 deletions(-) diff --git a/lld/COFF/Chunks.cpp b/lld/COFF/Chunks.cpp index 39f4575..004d710 100644 --- a/lld/COFF/Chunks.cpp +++ b/lld/COFF/Chunks.cpp @@ -437,19 +437,17 @@ void SectionChunk::applyRelocation(uint8_t *off, // Compute the RVA of the relocation for relative relocations. uint64_t p = rva + rel.VirtualAddress; uint64_t imageBase = file->ctx.config.imageBase; - switch (getMachine()) { - case AMD64: + switch (getArch()) { + case Triple::x86_64: applyRelX64(off, rel.Type, os, s, p, imageBase); break; - case I386: + case Triple::x86: applyRelX86(off, rel.Type, os, s, p, imageBase); break; - case ARMNT: + case Triple::thumb: applyRelARM(off, rel.Type, os, s, p, imageBase); break; - case ARM64: - case ARM64EC: - case ARM64X: + case Triple::aarch64: applyRelARM64(off, rel.Type, os, s, p, imageBase); break; default: @@ -516,27 +514,25 @@ void SectionChunk::addAssociative(SectionChunk *child) { } static uint8_t getBaserelType(const coff_relocation &rel, - llvm::COFF::MachineTypes machine) { - switch (machine) { - case AMD64: + Triple::ArchType arch) { + switch (arch) { + case Triple::x86_64: if (rel.Type == IMAGE_REL_AMD64_ADDR64) return IMAGE_REL_BASED_DIR64; if (rel.Type == IMAGE_REL_AMD64_ADDR32) return IMAGE_REL_BASED_HIGHLOW; return IMAGE_REL_BASED_ABSOLUTE; - case I386: + case Triple::x86: if (rel.Type == IMAGE_REL_I386_DIR32) return IMAGE_REL_BASED_HIGHLOW; return IMAGE_REL_BASED_ABSOLUTE; - case ARMNT: + case Triple::thumb: if (rel.Type == IMAGE_REL_ARM_ADDR32) return IMAGE_REL_BASED_HIGHLOW; if (rel.Type == IMAGE_REL_ARM_MOV32T) return IMAGE_REL_BASED_ARM_MOV32T; return IMAGE_REL_BASED_ABSOLUTE; - case ARM64: - case ARM64EC: - case ARM64X: + case Triple::aarch64: if (rel.Type == IMAGE_REL_ARM64_ADDR64) return IMAGE_REL_BASED_DIR64; return IMAGE_REL_BASED_ABSOLUTE; @@ -551,7 +547,7 @@ static uint8_t getBaserelType(const coff_relocation &rel, // Only called when base relocation is enabled. void SectionChunk::getBaserels(std::vector *res) { for (const coff_relocation &rel : getRelocs()) { - uint8_t ty = getBaserelType(rel, getMachine()); + uint8_t ty = getBaserelType(rel, getArch()); if (ty == IMAGE_REL_BASED_ABSOLUTE) continue; Symbol *target = file->getSymbol(rel.SymbolTableIndex); diff --git a/lld/COFF/Chunks.h b/lld/COFF/Chunks.h index 7b6bdea..bb91903 100644 --- a/lld/COFF/Chunks.h +++ b/lld/COFF/Chunks.h @@ -18,6 +18,7 @@ #include "llvm/ADT/iterator_range.h" #include "llvm/MC/StringTableBuilder.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/WindowsMachineFlag.h" #include #include @@ -116,6 +117,7 @@ public: bool isHotPatchable() const; MachineTypes getMachine() const; + llvm::Triple::ArchType getArch() const; std::optional getArm64ECRangeType() const; protected: @@ -437,6 +439,10 @@ inline MachineTypes Chunk::getMachine() const { return static_cast(this)->getMachine(); } +inline llvm::Triple::ArchType Chunk::getArch() const { + return llvm::getMachineArchType(getMachine()); +} + inline std::optional Chunk::getArm64ECRangeType() const { // Data sections don't need codemap entries. if (!(getOutputCharacteristics() & llvm::COFF::IMAGE_SCN_MEM_EXECUTE)) diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index ea37f8d..b0365b5 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -31,7 +31,6 @@ #include "llvm/Object/ArchiveWriter.h" #include "llvm/Object/COFFImportFile.h" #include "llvm/Object/COFFModuleDefinition.h" -#include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Option/Arg.h" #include "llvm/Option/ArgList.h" #include "llvm/Option/Option.h" diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 44aa506..3accf24 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -19,7 +19,6 @@ #include "llvm/DebugInfo/DIContext.h" #include "llvm/IR/LLVMContext.h" #include "llvm/LTO/LTO.h" -#include "llvm/Object/WindowsMachineFlag.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include -- cgit v1.1 From e0e615efac522365591119165a7691ce869de512 Mon Sep 17 00:00:00 2001 From: Christian Ulmann Date: Thu, 4 Apr 2024 15:07:18 +0200 Subject: Revert "[MLIR][LLVM][Mem2Reg] Relax type equality requirement for load and store (#87504)" (#87631) This reverts commit d6e458219897fad0e460e663833b2190af48c06d as it violates an assumption of Mem2Reg's block argument creation. Mem2Reg strongly assumes that all involved values have the same type as the alloca, which was relaxed by this PR. Therefore, branches got created that jumped to basic blocks with differently typed block arguments. --- mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 38 +---- mlir/test/Dialect/LLVMIR/mem2reg.mlir | 197 -------------------------- 2 files changed, 4 insertions(+), 231 deletions(-) diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index f8fd3bd..06c1fdd 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -126,13 +126,6 @@ Value LLVM::StoreOp::getStored(const MemorySlot &slot, RewriterBase &rewriter) { return getValue(); } -/// Checks that two types are the same or can be cast into one another. -static bool areCastCompatible(const DataLayout &layout, Type lhs, Type rhs) { - return lhs == rhs || (!isa(lhs) && - !isa(rhs) && - layout.getTypeSize(lhs) == layout.getTypeSize(rhs)); -} - bool LLVM::LoadOp::canUsesBeRemoved( const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, SmallVectorImpl &newBlockingUses, @@ -145,29 +138,7 @@ bool LLVM::LoadOp::canUsesBeRemoved( // be removed (provided it loads the exact stored value and is not // volatile). return blockingUse == slot.ptr && getAddr() == slot.ptr && - areCastCompatible(dataLayout, getResult().getType(), slot.elemType) && - !getVolatile_(); -} - -/// Constructs operations that convert `inputValue` into a new value of type -/// `targetType`. Assumes that this conversion is possible. -static Value createConversionSequence(RewriterBase &rewriter, Location loc, - Value inputValue, Type targetType) { - if (inputValue.getType() == targetType) - return inputValue; - - if (!isa(targetType) && - !isa(inputValue.getType())) - return rewriter.createOrFold(loc, targetType, inputValue); - - if (!isa(targetType)) - return rewriter.createOrFold(loc, targetType, inputValue); - - if (!isa(inputValue.getType())) - return rewriter.createOrFold(loc, targetType, inputValue); - - return rewriter.createOrFold(loc, targetType, - inputValue); + getResult().getType() == slot.elemType && !getVolatile_(); } DeletionKind LLVM::LoadOp::removeBlockingUses( @@ -175,9 +146,7 @@ DeletionKind LLVM::LoadOp::removeBlockingUses( RewriterBase &rewriter, Value reachingDefinition) { // `canUsesBeRemoved` checked this blocking use must be the loaded slot // pointer. - Value newResult = createConversionSequence( - rewriter, getLoc(), reachingDefinition, getResult().getType()); - rewriter.replaceAllUsesWith(getResult(), newResult); + rewriter.replaceAllUsesWith(getResult(), reachingDefinition); return DeletionKind::Delete; } @@ -192,7 +161,8 @@ bool LLVM::StoreOp::canUsesBeRemoved( // fine, provided we are currently promoting its target value. Don't allow a // store OF the slot pointer, only INTO the slot pointer. return blockingUse == slot.ptr && getAddr() == slot.ptr && - getValue() != slot.ptr && !getVolatile_(); + getValue() != slot.ptr && getValue().getType() == slot.elemType && + !getVolatile_(); } DeletionKind LLVM::StoreOp::removeBlockingUses( diff --git a/mlir/test/Dialect/LLVMIR/mem2reg.mlir b/mlir/test/Dialect/LLVMIR/mem2reg.mlir index d6d5e1b..90e56c1 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg.mlir @@ -697,200 +697,3 @@ llvm.func @transitive_reaching_def() -> !llvm.ptr { %3 = llvm.load %1 {alignment = 8 : i64} : !llvm.ptr -> !llvm.ptr llvm.return %3 : !llvm.ptr } - -// ----- - -// CHECK-LABEL: @load_int_from_float -llvm.func @load_int_from_float() -> i32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : f32 to i32 - // CHECK: llvm.return %[[BITCAST:.*]] - llvm.return %2 : i32 -} - -// ----- - -// CHECK-LABEL: @load_float_from_int -llvm.func @load_float_from_int() -> f32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : i32 to f32 - // CHECK: llvm.return %[[BITCAST:.*]] - llvm.return %2 : f32 -} - -// ----- - -// CHECK-LABEL: @load_int_from_vector -llvm.func @load_int_from_vector() -> i32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x vector<2xi16> : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[UNDEF]] : vector<2xi16> to i32 - // CHECK: llvm.return %[[BITCAST:.*]] - llvm.return %2 : i32 -} - -// ----- - -// LLVM arrays cannot be bitcasted, so the following cannot be promoted. - -// CHECK-LABEL: @load_int_from_array -llvm.func @load_int_from_array() -> i32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca - %1 = llvm.alloca %0 x !llvm.array<2 x i16> : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 - // CHECK-NOT: llvm.bitcast - llvm.return %2 : i32 -} - -// ----- - -// CHECK-LABEL: @store_int_to_float -// CHECK-SAME: %[[ARG:.*]]: i32 -llvm.func @store_int_to_float(%arg: i32) -> i32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x f32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 - // CHECK: llvm.return %[[ARG]] - llvm.return %2 : i32 -} - -// ----- - -// CHECK-LABEL: @store_float_to_int -// CHECK-SAME: %[[ARG:.*]]: f32 -llvm.func @store_float_to_int(%arg: f32) -> i32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - llvm.store %arg, %1 {alignment = 4 : i64} : f32, !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i32 - // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : f32 to i32 - // CHECK: llvm.return %[[BITCAST]] - llvm.return %2 : i32 -} - -// ----- - -// CHECK-LABEL: @store_int_to_vector -// CHECK-SAME: %[[ARG:.*]]: i32 -llvm.func @store_int_to_vector(%arg: i32) -> vector<4xi8> { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x vector<2xi16> {alignment = 4 : i64} : (i32) -> !llvm.ptr - llvm.store %arg, %1 {alignment = 4 : i64} : i32, !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> vector<4xi8> - // CHECK: %[[BITCAST:.*]] = llvm.bitcast %[[ARG]] : i32 to vector<4xi8> - // CHECK: llvm.return %[[BITCAST]] - llvm.return %2 : vector<4xi8> -} - -// ----- - -// CHECK-LABEL: @load_ptr_from_int -llvm.func @load_ptr_from_int() -> !llvm.ptr { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x i64 {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[CAST:.*]] = llvm.inttoptr %[[UNDEF]] : i64 to !llvm.ptr - // CHECK: llvm.return %[[CAST:.*]] - llvm.return %2 : !llvm.ptr -} - -// ----- - -// CHECK-LABEL: @load_int_from_ptr -llvm.func @load_int_from_ptr() -> i64 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x !llvm.ptr {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[CAST:.*]] = llvm.ptrtoint %[[UNDEF]] : !llvm.ptr to i64 - // CHECK: llvm.return %[[CAST:.*]] - llvm.return %2 : i64 -} - -// ----- - -// CHECK-LABEL: @load_ptr_addrspace_cast -llvm.func @load_ptr_addrspace_cast() -> !llvm.ptr<2> { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK-NOT: llvm.alloca - %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> - // CHECK: %[[UNDEF:.*]] = llvm.mlir.undef - // CHECK: %[[CAST:.*]] = llvm.addrspacecast %[[UNDEF]] : !llvm.ptr<1> to !llvm.ptr<2> - // CHECK: llvm.return %[[CAST:.*]] - llvm.return %2 : !llvm.ptr<2> -} - -// ----- - -// CHECK-LABEL: @load_smaller_int -llvm.func @load_smaller_int() -> i16 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca - %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i16 - llvm.return %2 : i16 -} - -// ----- - -// CHECK-LABEL: @load_different_type_smaller -llvm.func @load_different_type_smaller() -> f32 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca - %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f32 - llvm.return %2 : f32 -} - -// ----- - -// This alloca is too small for the load, still, mem2reg should not touch it. - -// CHECK-LABEL: @impossible_load -llvm.func @impossible_load() -> f64 { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca - %1 = llvm.alloca %0 x i32 {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> f64 - llvm.return %2 : f64 -} - -// ----- - -// Verifies that mem2reg does not introduce address space casts of pointers -// with different bitsize. - -module attributes { dlti.dl_spec = #dlti.dl_spec< - #dlti.dl_entry, dense<[32, 64, 64]> : vector<3xi64>>, - #dlti.dl_entry, dense<[64, 64, 64]> : vector<3xi64>> ->} { - - // CHECK-LABEL: @load_ptr_addrspace_cast_different_size - llvm.func @load_ptr_addrspace_cast_different_size() -> !llvm.ptr<2> { - %0 = llvm.mlir.constant(1 : i32) : i32 - // CHECK: llvm.alloca - %1 = llvm.alloca %0 x !llvm.ptr<1> {alignment = 4 : i64} : (i32) -> !llvm.ptr - %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> !llvm.ptr<2> - llvm.return %2 : !llvm.ptr<2> - } -} -- cgit v1.1 From 417a068b63c01d79511fe3732dd52377b05d06fc Mon Sep 17 00:00:00 2001 From: Mikael Holmen Date: Thu, 4 Apr 2024 14:57:10 +0200 Subject: [clang][CGBlocks] Remove unused variable "refType" [NFC] Without the change gcc warned like ../../clang/lib/CodeGen/CGBlocks.cpp:965:21: warning: unused variable 'refType' [-Wunused-variable] 965 | } else if (auto refType = type->getAs()) { | ^~~~~~~ --- clang/lib/CodeGen/CGBlocks.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index a01f2c7..47f063b 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -962,7 +962,7 @@ llvm::Value *CodeGenFunction::EmitBlockLiteral(const CGBlockInfo &blockInfo) { } // If it's a reference variable, copy the reference into the block field. - } else if (auto refType = type->getAs()) { + } else if (type->getAs()) { Builder.CreateStore(src.emitRawPointer(*this), blockField); // If type is const-qualified, copy the value into the block field. -- cgit v1.1 From a1f4ac7704255627ac33ad67a22be5ac030f6179 Mon Sep 17 00:00:00 2001 From: Phoebe Wang Date: Thu, 4 Apr 2024 21:12:25 +0800 Subject: [SEH] Ignore EH pad check for internal intrinsics (#79694) Intrinsics like @llvm.seh.scope.begin and @llvm.seh.scope.end which do not throw do not need funclets in catchpads or cleanuppads. Fixes #69428 Co-authored-by: Robert Cox --------- Co-authored-by: Robert Cox --- llvm/lib/IR/Verifier.cpp | 5 +++++ llvm/test/Verifier/pr69428.ll | 48 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 llvm/test/Verifier/pr69428.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 33f3584..ba0b723 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4343,6 +4343,11 @@ void Verifier::visitEHPadPredecessors(Instruction &I) { if (auto *II = dyn_cast(TI)) { Check(II->getUnwindDest() == BB && II->getNormalDest() != BB, "EH pad must be jumped to via an unwind edge", ToPad, II); + auto *CalledFn = + dyn_cast(II->getCalledOperand()->stripPointerCasts()); + if (CalledFn && CalledFn->isIntrinsic() && II->doesNotThrow() && + !IntrinsicInst::mayLowerToFunctionCall(CalledFn->getIntrinsicID())) + continue; if (auto Bundle = II->getOperandBundle(LLVMContext::OB_funclet)) FromPad = Bundle->Inputs[0]; else diff --git a/llvm/test/Verifier/pr69428.ll b/llvm/test/Verifier/pr69428.ll new file mode 100644 index 0000000..be8733b --- /dev/null +++ b/llvm/test/Verifier/pr69428.ll @@ -0,0 +1,48 @@ +; RUN: llvm-as -disable-output %s + +%struct._List_node_emplace_op2 = type { i8 } + +@"?_List@@3HA" = global i32 0, align 4 + +define void @"?ExecutionEngineaddExecutableDependency@@YAXXZ"() personality ptr @__CxxFrameHandler3 { +entry: + %agg.tmp.ensured.i = alloca %struct._List_node_emplace_op2, align 1 + %0 = load i32, ptr @"?_List@@3HA", align 4 + %call.i = call noundef ptr @"??0?$_List_node_emplace_op2@H@@QEAA@H@Z"(ptr %agg.tmp.ensured.i, i32 %0) + invoke void @llvm.seh.scope.begin() + to label %invoke.cont.i unwind label %ehcleanup.i + +invoke.cont.i: ; preds = %entry + invoke void @llvm.seh.scope.end() + to label %invoke.cont2.i unwind label %ehcleanup.i + +invoke.cont2.i: ; preds = %invoke.cont.i + call void @"??1?$_List_node_emplace_op2@H@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 + unreachable + +ehcleanup.i: ; preds = %invoke.cont.i, %entry + %1 = cleanuppad within none [] + invoke void @llvm.seh.scope.begin() + to label %invoke.cont.i.i unwind label %ehcleanup.i.i + +invoke.cont.i.i: ; preds = %ehcleanup.i + invoke void @llvm.seh.scope.end() + to label %"??1?$_List_node_emplace_op2@H@@QEAA@XZ.exit.i" unwind label %ehcleanup.i.i + +ehcleanup.i.i: ; preds = %invoke.cont.i.i, %ehcleanup.i + %2 = cleanuppad within %1 [] + call void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 [ "funclet"(token %2) ] + cleanupret from %2 unwind to caller + +"??1?$_List_node_emplace_op2@H@@QEAA@XZ.exit.i": ; preds = %invoke.cont.i.i + call void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr %agg.tmp.ensured.i) #6 [ "funclet"(token %1) ] + cleanupret from %1 unwind to caller +} + +declare i32 @__CxxFrameHandler3(...) +declare void @llvm.seh.scope.begin() +declare void @llvm.seh.scope.end() + +declare void @"??1?$_List_node_emplace_op2@H@@QEAA@XZ"(ptr) +declare void @"??1_Alloc_construct_ptr@@QEAA@XZ"(ptr) +declare ptr @"??0?$_List_node_emplace_op2@H@@QEAA@H@Z"(ptr, i32) -- cgit v1.1 From ea88bb16485a34db58333637d8558ff57e6a1329 Mon Sep 17 00:00:00 2001 From: jeanPerier Date: Thu, 4 Apr 2024 15:31:54 +0200 Subject: [flang] Return 1 in ERROR STOP without user provided stop-code (#87501) See F'2023 section 11.4: "If the stop-code in an ERROR STOP statement is of type character or does not appear, it is recommended that a processor-dependent nonzero value be supplied as the process exit status" Fixes https://github.com/llvm/llvm-project/issues/66581. --- flang/lib/Lower/Runtime.cpp | 9 +++++---- flang/test/Lower/stop-statement.f90 | 4 ++-- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/flang/lib/Lower/Runtime.cpp b/flang/lib/Lower/Runtime.cpp index e769592..3474832 100644 --- a/flang/lib/Lower/Runtime.cpp +++ b/flang/lib/Lower/Runtime.cpp @@ -55,6 +55,8 @@ static void genUnreachable(fir::FirOpBuilder &builder, mlir::Location loc) { void Fortran::lower::genStopStatement( Fortran::lower::AbstractConverter &converter, const Fortran::parser::StopStmt &stmt) { + const bool isError = std::get(stmt.t) == + Fortran::parser::StopStmt::Kind::ErrorStop; fir::FirOpBuilder &builder = converter.getFirOpBuilder(); mlir::Location loc = converter.getCurrentLocation(); Fortran::lower::StatementContext stmtCtx; @@ -94,13 +96,12 @@ void Fortran::lower::genStopStatement( } else { callee = fir::runtime::getRuntimeFunc(loc, builder); calleeType = callee.getFunctionType(); - operands.push_back( - builder.createIntegerConstant(loc, calleeType.getInput(0), 0)); + // Default to values are advised in F'2023 11.4 p2. + operands.push_back(builder.createIntegerConstant( + loc, calleeType.getInput(0), isError ? 1 : 0)); } // Second operand indicates ERROR STOP - bool isError = std::get(stmt.t) == - Fortran::parser::StopStmt::Kind::ErrorStop; operands.push_back(builder.createIntegerConstant( loc, calleeType.getInput(operands.size()), isError)); diff --git a/flang/test/Lower/stop-statement.f90 b/flang/test/Lower/stop-statement.f90 index bc94a7e..cf0665c 100644 --- a/flang/test/Lower/stop-statement.f90 +++ b/flang/test/Lower/stop-statement.f90 @@ -21,10 +21,10 @@ end subroutine ! CHECK-LABEL: stop_error subroutine stop_error() error stop - ! CHECK-DAG: %[[c0:.*]] = arith.constant 0 : i32 + ! CHECK-DAG: %[[c_1:.*]] = arith.constant 1 : i32 ! CHECK-DAG: %[[true:.*]] = arith.constant true ! CHECK-DAG: %[[false:.*]] = arith.constant false - ! CHECK: fir.call @_Fortran{{.*}}StopStatement(%[[c0]], %[[true]], %[[false]]) + ! CHECK: fir.call @_Fortran{{.*}}StopStatement(%[[c_1]], %[[true]], %[[false]]) ! CHECK-NEXT: fir.unreachable end subroutine -- cgit v1.1 From 1bce411073a1deafef998d0fe9e3ae74c4cef1e5 Mon Sep 17 00:00:00 2001 From: YunQiang Su Date: Thu, 4 Apr 2024 21:51:25 +0800 Subject: MIPS/Clang: Set HasUnalignedAccess false if +strict-align (#87257) TargetInfo has HasUnalignedAccess support now. For MIPSr6, we should set it according strict-align. For pre-R6, we always set strict-align and HasUnalignedAccess to false. --- clang/lib/Basic/Targets/Mips.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/clang/lib/Basic/Targets/Mips.h b/clang/lib/Basic/Targets/Mips.h index c9dcf43..0d6e4b4 100644 --- a/clang/lib/Basic/Targets/Mips.h +++ b/clang/lib/Basic/Targets/Mips.h @@ -318,6 +318,7 @@ public: FPMode = isFP64Default() ? FP64 : FPXX; NoOddSpreg = false; bool OddSpregGiven = false; + bool StrictAlign = false; for (const auto &Feature : Features) { if (Feature == "+single-float") @@ -330,6 +331,10 @@ public: IsMicromips = true; else if (Feature == "+mips32r6" || Feature == "+mips64r6") HasUnalignedAccess = true; + // We cannot be sure that the order of strict-align vs mips32r6. + // Thus we need an extra variable here. + else if (Feature == "+strict-align") + StrictAlign = true; else if (Feature == "+dsp") DspRev = std::max(DspRev, DSP1); else if (Feature == "+dspr2") @@ -368,6 +373,9 @@ public: if (FPMode == FPXX && !OddSpregGiven) NoOddSpreg = true; + if (StrictAlign) + HasUnalignedAccess = false; + setDataLayout(); return true; -- cgit v1.1 From 110e933b7ae9150710a48b586fd3da39439079c2 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 14:51:23 +0100 Subject: CGOpenMPRuntime.cpp - fix Wparentheses warning. NFC. --- clang/lib/CodeGen/CGOpenMPRuntime.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/clang/lib/CodeGen/CGOpenMPRuntime.cpp b/clang/lib/CodeGen/CGOpenMPRuntime.cpp index bc36331..8eb1058 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntime.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntime.cpp @@ -2648,9 +2648,9 @@ void CGOpenMPRuntime::emitDistributeStaticInit( void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction &CGF, SourceLocation Loc, OpenMPDirectiveKind DKind) { - assert(DKind == OMPD_distribute || DKind == OMPD_for || - DKind == OMPD_sections && - "Expected distribute, for, or sections directive kind"); + assert((DKind == OMPD_distribute || DKind == OMPD_for || + DKind == OMPD_sections) && + "Expected distribute, for, or sections directive kind"); if (!CGF.HaveInsertPoint()) return; // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid); -- cgit v1.1 From 24c256a6b7892bcf98eac531c99d9038cd826ce0 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 14:56:31 +0100 Subject: AMDGPULowerBufferFatPointers.cpp - fix Wparentheses warning. NFC. --- llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 9083150..20ca633 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1777,8 +1777,8 @@ void SplitPtrStructs::processFunction(Function &F) { Originals.push_back(&I); for (Instruction *I : Originals) { auto [Rsrc, Off] = visit(I); - assert((Rsrc && Off) || - (!Rsrc && !Off) && "Can't have a resource but no offset"); + assert(((Rsrc && Off) || (!Rsrc && !Off)) && + "Can't have a resource but no offset"); if (Rsrc) RsrcParts[I] = Rsrc; if (Off) -- cgit v1.1 From 6fa2d03bbf86bc01140e3b007554617b029fd3f1 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 14:58:42 +0100 Subject: AMDGPULowerBufferFatPointers.cpp - fix Wunused-variable warning. NFC. --- llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 20ca633..1114a8c 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -1086,7 +1086,7 @@ void SplitPtrStructs::processConditionals() { if (MaybeRsrc) for (Value *V : Seen) FoundRsrcs[cast(V)] = NewRsrc; - } else if (auto *SI = dyn_cast(I)) { + } else if (isa(I)) { if (MaybeRsrc) { ConditionalTemps.push_back(cast(Rsrc)); Rsrc->replaceAllUsesWith(*MaybeRsrc); -- cgit v1.1 From a69673615bb9f14794056470a32f70f60a52213d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 15:19:07 +0100 Subject: [X86] Haswell/Broadwell - fix (V)ROUND*ri sched behaviours to use 2*Port1 We were only using the Port23 memory ports and were missing the 2*Port1 uops entirely. Confirmed by Agner + uops.info/uica --- llvm/lib/Target/X86/X86SchedBroadwell.td | 6 ++--- llvm/lib/Target/X86/X86SchedHaswell.td | 9 +++----- .../tools/llvm-mca/X86/Broadwell/resources-avx1.s | 26 +++++++++++----------- .../tools/llvm-mca/X86/Broadwell/resources-sse41.s | 18 +++++++-------- .../tools/llvm-mca/X86/Haswell/resources-avx1.s | 26 +++++++++++----------- .../tools/llvm-mca/X86/Haswell/resources-sse41.s | 18 +++++++-------- 6 files changed, 49 insertions(+), 54 deletions(-) diff --git a/llvm/lib/Target/X86/X86SchedBroadwell.td b/llvm/lib/Target/X86/X86SchedBroadwell.td index b3ee7a8..63ac910 100644 --- a/llvm/lib/Target/X86/X86SchedBroadwell.td +++ b/llvm/lib/Target/X86/X86SchedBroadwell.td @@ -329,11 +329,9 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : BWWriteResPair; // Floating point fabs/fchs. -defm : X86WriteRes; // Floating point rounding. -defm : X86WriteRes; // Floating point rounding (YMM/ZMM). +defm : BWWriteResPair; // Floating point rounding. +defm : BWWriteResPair; // Floating point rounding (YMM/ZMM). defm : X86WriteResPairUnsupported; -defm : X86WriteRes; -defm : X86WriteRes; defm : BWWriteResPair; // Floating point and/or/xor logicals. defm : BWWriteResPair; // Floating point and/or/xor logicals (YMM/ZMM). defm : X86WriteResPairUnsupported; diff --git a/llvm/lib/Target/X86/X86SchedHaswell.td b/llvm/lib/Target/X86/X86SchedHaswell.td index 6c301a3..516dc62 100644 --- a/llvm/lib/Target/X86/X86SchedHaswell.td +++ b/llvm/lib/Target/X86/X86SchedHaswell.td @@ -329,12 +329,9 @@ defm : X86WriteRes; defm : X86WriteRes; defm : X86WriteRes; defm : HWWriteResPair; -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 -defm : X86WriteRes; -defm : X86WriteRes; -defm : X86WriteRes; // Unsupported = 1 +defm : HWWriteResPair; +defm : HWWriteResPair; +defm : HWWriteResPair; // Unsupported = 1 defm : HWWriteResPair; defm : HWWriteResPair; defm : HWWriteResPair; // Unsupported = 1 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s index ca1faf6..1b196b4 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-avx1.s @@ -1632,17 +1632,17 @@ vzeroupper # CHECK-NEXT: 4 17 2.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 5 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: 3 12 2.00 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: 3 12 2.00 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 11 2.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrsqrtps (%rax), %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 257.00 216.25 235.25 176.17 176.17 38.00 424.25 3.25 12.67 +# CHECK-NEXT: - 257.00 216.25 247.25 173.17 173.17 38.00 424.25 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2342,17 +2342,17 @@ vzeroupper # CHECK-NEXT: - - 2.33 0.33 0.50 0.50 - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s index dcc5353..4865121 100644 --- a/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Broadwell/resources-sse41.s @@ -243,13 +243,13 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 15 2.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 7 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 1 6 0.50 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundss $1, %xmm0, %xmm2 # CHECK-NEXT: 3 11 2.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 +# CHECK-NEXT: - - 23.83 30.33 23.67 23.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -358,11 +358,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 - 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundss $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s index cff60c9..05c4760 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-avx1.s @@ -1632,17 +1632,17 @@ vzeroupper # CHECK-NEXT: 4 18 2.00 * vrcpps (%rax), %ymm2 # CHECK-NEXT: 1 5 1.00 vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 2 10 1.00 * vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: 3 13 2.00 * vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: 2 6 2.00 vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: 3 13 2.00 * vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: 1 6 0.50 vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: 1 6 0.50 vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: 2 6 2.00 vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: 3 12 2.00 * vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: 1 5 1.00 vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: 2 11 1.00 * vrsqrtps (%rax), %xmm2 @@ -1736,7 +1736,7 @@ vzeroupper # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - 336.00 215.58 236.58 176.17 176.17 38.00 427.58 3.25 12.67 +# CHECK-NEXT: - 336.00 215.58 248.58 173.17 173.17 38.00 427.58 3.25 12.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -2342,17 +2342,17 @@ vzeroupper # CHECK-NEXT: - - 2.33 0.33 0.50 0.50 - 0.33 - - vrcpps (%rax), %ymm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrcpss %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrcpss (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundpd $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundpd $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundpd $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundps $1, %ymm0, %ymm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundps $1, %ymm0, %ymm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundps $1, (%rax), %ymm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundsd $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundsd $1, (%rax), %xmm1, %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - vroundss $1, %xmm0, %xmm1, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - vroundss $1, %xmm0, %xmm1, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - vroundss $1, (%rax), %xmm1, %xmm2 # CHECK-NEXT: - - 1.00 - - - - - - - vrsqrtps %xmm0, %xmm2 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - - - - vrsqrtps (%rax), %xmm2 diff --git a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s index c2d0773..62dfa23 100644 --- a/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Haswell/resources-sse41.s @@ -243,13 +243,13 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: 3 16 2.00 * pmulld (%rax), %xmm2 # CHECK-NEXT: 2 2 1.00 ptest %xmm0, %xmm1 # CHECK-NEXT: 3 8 1.00 * ptest (%rax), %xmm1 -# CHECK-NEXT: 1 6 0.50 roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundps $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundps $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: 1 6 0.50 roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: 2 6 2.00 roundss $1, %xmm0, %xmm2 # CHECK-NEXT: 3 12 2.00 * roundss $1, (%rax), %xmm2 # CHECK: Resources: @@ -266,7 +266,7 @@ roundss $1, (%rax), %xmm2 # CHECK: Resource pressure per iteration: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] -# CHECK-NEXT: - - 23.83 22.33 25.67 25.67 5.00 80.33 0.50 1.67 +# CHECK-NEXT: - - 23.83 30.33 23.67 23.67 5.00 80.33 0.50 1.67 # CHECK: Resource pressure by instruction: # CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] [8] [9] Instructions: @@ -358,11 +358,11 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: - - 2.00 - 0.50 0.50 - - - - pmulld (%rax), %xmm2 # CHECK-NEXT: - - 1.00 - - - - 1.00 - - ptest %xmm0, %xmm1 # CHECK-NEXT: - - 1.00 - 0.50 0.50 - 1.00 - - ptest (%rax), %xmm1 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundpd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundpd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundpd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundps $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundps $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundps $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundsd $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundsd $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundsd $1, (%rax), %xmm2 -# CHECK-NEXT: - - - - 0.50 0.50 - - - - roundss $1, %xmm0, %xmm2 +# CHECK-NEXT: - - - 2.00 - - - - - - roundss $1, %xmm0, %xmm2 # CHECK-NEXT: - - - 2.00 0.50 0.50 - - - - roundss $1, (%rax), %xmm2 -- cgit v1.1 From ecb34599bdadfb74ee22529ad150e7500dd22641 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 15:20:16 +0100 Subject: [X86] Add missing immediate qualifier to the (V)ROUND instructions (#87636) Makes it easier to algorithmically recreate the instruction name in various analysis scripts I'm working on --- llvm/lib/Target/X86/X86InstrInfo.cpp | 32 +++--- llvm/lib/Target/X86/X86InstrSSE.td | 116 ++++++++++----------- llvm/lib/Target/X86/X86SchedSapphireRapids.td | 14 +-- llvm/test/TableGen/x86-fold-tables.inc | 28 ++--- .../utils/TableGen/X86ManualCompressEVEXTables.def | 16 +-- 5 files changed, 103 insertions(+), 103 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp index f243343..a5b2e48 100644 --- a/llvm/lib/Target/X86/X86InstrInfo.cpp +++ b/llvm/lib/Target/X86/X86InstrInfo.cpp @@ -6276,10 +6276,10 @@ static bool hasPartialRegUpdate(unsigned Opcode, const X86Subtarget &Subtarget, case X86::RCPSSm: case X86::RCPSSr_Int: case X86::RCPSSm_Int: - case X86::ROUNDSDr: - case X86::ROUNDSDm: - case X86::ROUNDSSr: - case X86::ROUNDSSm: + case X86::ROUNDSDri: + case X86::ROUNDSDmi: + case X86::ROUNDSSri: + case X86::ROUNDSSmi: case X86::RSQRTSSr: case X86::RSQRTSSm: case X86::RSQRTSSr_Int: @@ -6778,14 +6778,14 @@ static bool hasUndefRegUpdate(unsigned Opcode, unsigned OpNum, case X86::VRCPSSr_Int: case X86::VRCPSSm: case X86::VRCPSSm_Int: - case X86::VROUNDSDr: - case X86::VROUNDSDm: - case X86::VROUNDSDr_Int: - case X86::VROUNDSDm_Int: - case X86::VROUNDSSr: - case X86::VROUNDSSm: - case X86::VROUNDSSr_Int: - case X86::VROUNDSSm_Int: + case X86::VROUNDSDri: + case X86::VROUNDSDmi: + case X86::VROUNDSDri_Int: + case X86::VROUNDSDmi_Int: + case X86::VROUNDSSri: + case X86::VROUNDSSmi: + case X86::VROUNDSSri_Int: + case X86::VROUNDSSmi_Int: case X86::VRSQRTSSr: case X86::VRSQRTSSr_Int: case X86::VRSQRTSSm: @@ -7516,8 +7516,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VRCPSSr_Int: case X86::RSQRTSSr_Int: case X86::VRSQRTSSr_Int: - case X86::ROUNDSSr_Int: - case X86::VROUNDSSr_Int: + case X86::ROUNDSSri_Int: + case X86::VROUNDSSri_Int: case X86::COMISSrr_Int: case X86::VCOMISSrr_Int: case X86::VCOMISSZrr_Int: @@ -7685,8 +7685,8 @@ static bool isNonFoldablePartialRegisterLoad(const MachineInstr &LoadMI, case X86::VCVTSD2USI64Zrr_Int: case X86::VCVTTSD2USIZrr_Int: case X86::VCVTTSD2USI64Zrr_Int: - case X86::ROUNDSDr_Int: - case X86::VROUNDSDr_Int: + case X86::ROUNDSDri_Int: + case X86::VROUNDSDri_Int: case X86::COMISDrr_Int: case X86::VCOMISDrr_Int: case X86::VCOMISDZrr_Int: diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index 69d4536..2b391b6 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -5475,35 +5475,35 @@ multiclass sse41_fp_unop_p opc, string OpcodeStr, // Intrinsic operation, reg. // Vector intrinsic operation, reg let Uses = [MXCSR], mayRaiseFPException = 1 in { - def r : SS4AIi8, - Sched<[sched]>; + def ri : SS4AIi8, + Sched<[sched]>; // Vector intrinsic operation, mem - def m : SS4AIi8, - Sched<[sched.Folded]>; + def mi : SS4AIi8, + Sched<[sched.Folded]>; } } multiclass avx_fp_unop_rm opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8 opcss, bits<8> opcsd, string OpcodeStr, X86FoldableSchedWrite sched> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SSr : SS4AIi8, Sched<[sched]>; + def SSri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SSm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SSmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedSingle, hasSideEffects = 0 let ExeDomain = SSEPackedDouble, hasSideEffects = 0, isCodeGenOnly = 1 in { - def SDr : SS4AIi8, Sched<[sched]>; + def SDri : SS4AIi8, Sched<[sched]>; let mayLoad = 1 in - def SDm : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; + def SDmi : SS4AIi8, Sched<[sched.Folded, sched.ReadAfterFold]>; } // ExeDomain = SSEPackedDouble, hasSideEffects = 0 } } -multiclass sse41_fp_binop_s opcss, bits<8> opcsd, - string OpcodeStr, X86FoldableSchedWrite sched, - ValueType VT32, ValueType VT64, - SDNode OpNode, bit Is2Addr = 1> { +multiclass sse41_fp_unop_s_int opcss, bits<8> opcsd, + string OpcodeStr, X86FoldableSchedWrite sched, + ValueType VT32, ValueType VT64, + SDNode OpNode, bit Is2Addr = 1> { let Uses = [MXCSR], mayRaiseFPException = 1 in { let ExeDomain = SSEPackedSingle in { - def SSr_Int : SS4AIi8, Sched<[sched]>; - def SSm_Int : SS4AIi8, Sched<[sched]>; - def SDm_Int : SS4AIi8, - VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; + defm VROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales, 0>, + VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; defm VROUND : avx_fp_unop_rm<0x0A, 0x0B, "vround", SchedWriteFRnd.Scl>, VEX, VVVV, VEX_LIG, WIG, SIMD_EXC; } let Predicates = [UseAVX] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (VROUNDSSr (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; + (VROUNDSSri (f32 (IMPLICIT_DEF)), FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (VROUNDSDr (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; + (VROUNDSDri (f64 (IMPLICIT_DEF)), FR64:$src1, timm:$src2)>; } let Predicates = [UseAVX, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (VROUNDSSm (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSSmi (f32 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (VROUNDSDm (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; + (VROUNDSDmi (f64 (IMPLICIT_DEF)), addr:$src1, timm:$src2)>; } let ExeDomain = SSEPackedSingle in @@ -5667,21 +5667,21 @@ defm ROUNDPD : sse41_fp_unop_p<0x09, "roundpd", f128mem, VR128, v2f64, defm ROUND : sse41_fp_unop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl>; let Constraints = "$src1 = $dst" in -defm ROUND : sse41_fp_binop_s<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, - v4f32, v2f64, X86RndScales>; +defm ROUND : sse41_fp_unop_s_int<0x0A, 0x0B, "round", SchedWriteFRnd.Scl, + v4f32, v2f64, X86RndScales>; let Predicates = [UseSSE41] in { def : Pat<(X86any_VRndScale FR32:$src1, timm:$src2), - (ROUNDSSr FR32:$src1, timm:$src2)>; + (ROUNDSSri FR32:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale FR64:$src1, timm:$src2), - (ROUNDSDr FR64:$src1, timm:$src2)>; + (ROUNDSDri FR64:$src1, timm:$src2)>; } let Predicates = [UseSSE41, OptForSize] in { def : Pat<(X86any_VRndScale (loadf32 addr:$src1), timm:$src2), - (ROUNDSSm addr:$src1, timm:$src2)>; + (ROUNDSSmi addr:$src1, timm:$src2)>; def : Pat<(X86any_VRndScale (loadf64 addr:$src1), timm:$src2), - (ROUNDSDm addr:$src1, timm:$src2)>; + (ROUNDSDmi addr:$src1, timm:$src2)>; } //===----------------------------------------------------------------------===// diff --git a/llvm/lib/Target/X86/X86SchedSapphireRapids.td b/llvm/lib/Target/X86/X86SchedSapphireRapids.td index 88bb9ad..ff3fe32 100644 --- a/llvm/lib/Target/X86/X86SchedSapphireRapids.td +++ b/llvm/lib/Target/X86/X86SchedSapphireRapids.td @@ -2290,8 +2290,8 @@ def SPRWriteResGroup218 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let Latency = 15; let NumMicroOps = 3; } -def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)m$")>; -def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)m((_Int)?)$", +def : InstRW<[SPRWriteResGroup218], (instregex "^(V?)ROUNDP(D|S)mi$")>; +def : InstRW<[SPRWriteResGroup218, ReadAfterVecXLd], (instregex "^(V?)ROUNDS(D|S)mi((_Int)?)$", "^VRNDSCALEP(D|S)Z128rm(bi|ik)$", "^VRNDSCALEP(D|S)Z128rmbik(z?)$", "^VRNDSCALEP(D|S)Z128rmi((kz)?)$", @@ -2303,13 +2303,13 @@ def SPRWriteResGroup219 : SchedWriteRes<[SPRPort00_01]> { let Latency = 8; let NumMicroOps = 2; } -def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)r$", - "^(V?)ROUND(PS|SD)r$", - "^(V?)ROUNDS(D|S)r_Int$", +def : InstRW<[SPRWriteResGroup219], (instregex "^(V?)ROUND(PD|SS)ri$", + "^(V?)ROUND(PS|SD)ri$", + "^(V?)ROUNDS(D|S)ri_Int$", "^VRNDSCALEP(D|S)Z(128|256)rri((k|kz)?)$", "^VRNDSCALES(D|S)Zr$", "^VRNDSCALES(D|S)Zr(b?)_Int((k|kz)?)$", - "^VROUNDP(D|S)Yr$")>; + "^VROUNDP(D|S)Yri$")>; def SPRWriteResGroup220 : SchedWriteRes<[SPRPort00_06]> { let ReleaseAtCycles = [2]; @@ -3737,7 +3737,7 @@ def SPRWriteResGroup390 : SchedWriteRes<[SPRPort00_01, SPRPort02_03_11]> { let NumMicroOps = 3; } def : InstRW<[SPRWriteResGroup390], (instregex "^VF(C?)MADDCPHZ(128|256)m(b?)$", - "^VROUNDP(D|S)Ym$")>; + "^VROUNDP(D|S)Ymi$")>; def : InstRW<[SPRWriteResGroup390, ReadAfterVecXLd], (instregex "^VF(C?)MADDCSHZm$", "^VF(C?)MULCPHZ128rm(b?)$", "^VF(C?)MULCSHZrm$", diff --git a/llvm/test/TableGen/x86-fold-tables.inc b/llvm/test/TableGen/x86-fold-tables.inc index 4ab5567..493350d 100644 --- a/llvm/test/TableGen/x86-fold-tables.inc +++ b/llvm/test/TableGen/x86-fold-tables.inc @@ -984,10 +984,10 @@ static const X86FoldTableEntry Table1[] = { {X86::RORX32ri_EVEX, X86::RORX32mi_EVEX, 0}, {X86::RORX64ri, X86::RORX64mi, 0}, {X86::RORX64ri_EVEX, X86::RORX64mi_EVEX, 0}, - {X86::ROUNDPDr, X86::ROUNDPDm, TB_ALIGN_16}, - {X86::ROUNDPSr, X86::ROUNDPSm, TB_ALIGN_16}, - {X86::ROUNDSDr, X86::ROUNDSDm, 0}, - {X86::ROUNDSSr, X86::ROUNDSSm, 0}, + {X86::ROUNDPDri, X86::ROUNDPDmi, TB_ALIGN_16}, + {X86::ROUNDPSri, X86::ROUNDPSmi, TB_ALIGN_16}, + {X86::ROUNDSDri, X86::ROUNDSDmi, 0}, + {X86::ROUNDSSri, X86::ROUNDSSmi, 0}, {X86::RSQRTPSr, X86::RSQRTPSm, TB_ALIGN_16}, {X86::RSQRTSSr, X86::RSQRTSSm, 0}, {X86::SAR16r1_ND, X86::SAR16m1_ND, 0}, @@ -1791,10 +1791,10 @@ static const X86FoldTableEntry Table1[] = { {X86::VRNDSCALEPSZ128rri, X86::VRNDSCALEPSZ128rmi, 0}, {X86::VRNDSCALEPSZ256rri, X86::VRNDSCALEPSZ256rmi, 0}, {X86::VRNDSCALEPSZrri, X86::VRNDSCALEPSZrmi, 0}, - {X86::VROUNDPDYr, X86::VROUNDPDYm, 0}, - {X86::VROUNDPDr, X86::VROUNDPDm, 0}, - {X86::VROUNDPSYr, X86::VROUNDPSYm, 0}, - {X86::VROUNDPSr, X86::VROUNDPSm, 0}, + {X86::VROUNDPDYri, X86::VROUNDPDYmi, 0}, + {X86::VROUNDPDri, X86::VROUNDPDmi, 0}, + {X86::VROUNDPSYri, X86::VROUNDPSYmi, 0}, + {X86::VROUNDPSri, X86::VROUNDPSmi, 0}, {X86::VRSQRT14PDZ128r, X86::VRSQRT14PDZ128m, 0}, {X86::VRSQRT14PDZ256r, X86::VRSQRT14PDZ256m, 0}, {X86::VRSQRT14PDZr, X86::VRSQRT14PDZm, 0}, @@ -2234,8 +2234,8 @@ static const X86FoldTableEntry Table2[] = { {X86::PUNPCKLWDrr, X86::PUNPCKLWDrm, TB_ALIGN_16}, {X86::PXORrr, X86::PXORrm, TB_ALIGN_16}, {X86::RCPSSr_Int, X86::RCPSSm_Int, TB_NO_REVERSE}, - {X86::ROUNDSDr_Int, X86::ROUNDSDm_Int, TB_NO_REVERSE}, - {X86::ROUNDSSr_Int, X86::ROUNDSSm_Int, TB_NO_REVERSE}, + {X86::ROUNDSDri_Int, X86::ROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::ROUNDSSri_Int, X86::ROUNDSSmi_Int, TB_NO_REVERSE}, {X86::RSQRTSSr_Int, X86::RSQRTSSm_Int, TB_NO_REVERSE}, {X86::SBB16rr, X86::SBB16rm, 0}, {X86::SBB16rr_ND, X86::SBB16rm_ND, 0}, @@ -3778,10 +3778,10 @@ static const X86FoldTableEntry Table2[] = { {X86::VRNDSCALESHZr_Int, X86::VRNDSCALESHZm_Int, TB_NO_REVERSE}, {X86::VRNDSCALESSZr, X86::VRNDSCALESSZm, 0}, {X86::VRNDSCALESSZr_Int, X86::VRNDSCALESSZm_Int, TB_NO_REVERSE}, - {X86::VROUNDSDr, X86::VROUNDSDm, 0}, - {X86::VROUNDSDr_Int, X86::VROUNDSDm_Int, TB_NO_REVERSE}, - {X86::VROUNDSSr, X86::VROUNDSSm, 0}, - {X86::VROUNDSSr_Int, X86::VROUNDSSm_Int, TB_NO_REVERSE}, + {X86::VROUNDSDri, X86::VROUNDSDmi, 0}, + {X86::VROUNDSDri_Int, X86::VROUNDSDmi_Int, TB_NO_REVERSE}, + {X86::VROUNDSSri, X86::VROUNDSSmi, 0}, + {X86::VROUNDSSri_Int, X86::VROUNDSSmi_Int, TB_NO_REVERSE}, {X86::VRSQRT14PDZ128rkz, X86::VRSQRT14PDZ128mkz, 0}, {X86::VRSQRT14PDZ256rkz, X86::VRSQRT14PDZ256mkz, 0}, {X86::VRSQRT14PDZrkz, X86::VRSQRT14PDZmkz, 0}, diff --git a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def index 77cf65b..665a394 100644 --- a/llvm/utils/TableGen/X86ManualCompressEVEXTables.def +++ b/llvm/utils/TableGen/X86ManualCompressEVEXTables.def @@ -197,12 +197,12 @@ ENTRY(VPUNPCKLQDQZ128rm, VPUNPCKLQDQrm) ENTRY(VPUNPCKLQDQZ128rr, VPUNPCKLQDQrr) ENTRY(VPXORQZ128rm, VPXORrm) ENTRY(VPXORQZ128rr, VPXORrr) -ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDm) -ENTRY(VRNDSCALEPDZ128rri, VROUNDPDr) -ENTRY(VRNDSCALESDZm, VROUNDSDm) -ENTRY(VRNDSCALESDZm_Int, VROUNDSDm_Int) -ENTRY(VRNDSCALESDZr, VROUNDSDr) -ENTRY(VRNDSCALESDZr_Int, VROUNDSDr_Int) +ENTRY(VRNDSCALEPDZ128rmi, VROUNDPDmi) +ENTRY(VRNDSCALEPDZ128rri, VROUNDPDri) +ENTRY(VRNDSCALESDZm, VROUNDSDmi) +ENTRY(VRNDSCALESDZm_Int, VROUNDSDmi_Int) +ENTRY(VRNDSCALESDZr, VROUNDSDri) +ENTRY(VRNDSCALESDZr_Int, VROUNDSDri_Int) ENTRY(VSHUFPDZ128rmi, VSHUFPDrmi) ENTRY(VSHUFPDZ128rri, VSHUFPDrri) ENTRY(VSQRTPDZ128m, VSQRTPDm) @@ -306,8 +306,8 @@ ENTRY(VPUNPCKLQDQZ256rm, VPUNPCKLQDQYrm) ENTRY(VPUNPCKLQDQZ256rr, VPUNPCKLQDQYrr) ENTRY(VPXORQZ256rm, VPXORYrm) ENTRY(VPXORQZ256rr, VPXORYrr) -ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYm) -ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYr) +ENTRY(VRNDSCALEPDZ256rmi, VROUNDPDYmi) +ENTRY(VRNDSCALEPDZ256rri, VROUNDPDYri) ENTRY(VSHUFPDZ256rmi, VSHUFPDYrmi) ENTRY(VSHUFPDZ256rri, VSHUFPDYrri) ENTRY(VSQRTPDZ256m, VSQRTPDYm) -- cgit v1.1 From c1742525d0126a6124d15512b7283c4e37c7c186 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 15:42:29 +0100 Subject: [X86] evex-to-vex-compress.mir - update test checks missed in #87636 --- llvm/test/CodeGen/X86/evex-to-vex-compress.mir | 32 +++++++++++++------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir index 548cf24..13c9585 100644 --- a/llvm/test/CodeGen/X86/evex-to-vex-compress.mir +++ b/llvm/test/CodeGen/X86/evex-to-vex-compress.mir @@ -869,13 +869,13 @@ body: | $ymm0 = VSHUFPSZ256rmi $ymm0, $rdi, 1, $noreg, 0, $noreg, -24 ; CHECK: $ymm0 = VSHUFPSYrri $ymm0, $ymm1, -24 $ymm0 = VSHUFPSZ256rri $ymm0, $ymm1, -24 - ; CHECK: $ymm0 = VROUNDPDYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPDYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPDYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPDZ256rri $ymm0, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $ymm0 = VROUNDPSYr $ymm0, 15, implicit $mxcsr + ; CHECK: $ymm0 = VROUNDPSYri $ymm0, 15, implicit $mxcsr $ymm0 = VRNDSCALEPSZ256rri $ymm0, 15, implicit $mxcsr ; CHECK: $ymm0 = VPERM2F128rm $ymm0, $rip, 1, $noreg, 0, $noreg, 32 $ymm0 = VSHUFF32X4Z256rmi $ymm0, $rip, 1, $noreg, 0, $noreg, 228 @@ -1751,13 +1751,13 @@ body: | $xmm0 = VALIGNQZ128rmi $xmm0, $rip, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VPALIGNRrri $xmm0, $xmm1, 8 $xmm0 = VALIGNQZ128rri $xmm0, $xmm1, 1 - ; CHECK: $xmm0 = VROUNDPDm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPDr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPDri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPDZ128rri $xmm0, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSm $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rmi $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDPSr $xmm0, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDPSri $xmm0, 15, implicit $mxcsr $xmm0 = VRNDSCALEPSZ128rri $xmm0, 15, implicit $mxcsr RET64 @@ -2308,21 +2308,21 @@ body: | $xmm0 = VINSERTPSZrm $xmm0, $rdi, 1, $noreg, 0, $noreg, 1 ; CHECK: $xmm0 = VINSERTPSrr $xmm0, $xmm0, 1 $xmm0 = VINSERTPSZrr $xmm0, $xmm0, 1 - ; CHECK: $xmm0 = VROUNDSDm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSDr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSDri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESDZr_Int $xmm0, $xmm1, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSmi_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZm_Int $xmm0, $rip, 1, $noreg, 0, $noreg, 15, implicit $mxcsr - ; CHECK: $xmm0 = VROUNDSSr_Int $xmm0, $xmm1, 15, implicit $mxcsr + ; CHECK: $xmm0 = VROUNDSSri_Int $xmm0, $xmm1, 15, implicit $mxcsr $xmm0 = VRNDSCALESSZr_Int $xmm0, $xmm1, 15, implicit $mxcsr RET64 -- cgit v1.1 From 8ebf7b7d7aed45889415669e0c7353f9b528161a Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Thu, 4 Apr 2024 07:46:55 -0700 Subject: [InstallAPI][Test] Add test for invalid verification mode (#87602) --- clang/test/InstallAPI/driver-invalid-options.test | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/clang/test/InstallAPI/driver-invalid-options.test b/clang/test/InstallAPI/driver-invalid-options.test index 69f3b2d..0c630ea 100644 --- a/clang/test/InstallAPI/driver-invalid-options.test +++ b/clang/test/InstallAPI/driver-invalid-options.test @@ -7,3 +7,9 @@ // RUN: not clang-installapi -target x86_64-apple-ios-simulator %s -o tmp.tbd 2> %t // RUN: FileCheck --check-prefix INVALID_INSTALL_NAME -input-file %t %s // INVALID_INSTALL_NAME: error: no install name specified: add -install_name + +/// Check invalid verification mode. +// RUN: not clang-installapi -install_name Foo -target arm64-apple-ios13 \ +// RUN: --verify-mode=Invalid -o tmp.tbd 2> %t +// RUN: FileCheck --check-prefix INVALID_VERIFY_MODE -input-file %t %s +// INVALID_VERIFY_MODE: error: invalid value 'Invalid' in '--verify-mode=Invalid' -- cgit v1.1 From 5b59ae423a9e86beddafb868b9d549b2f18825ab Mon Sep 17 00:00:00 2001 From: Piotr Sobczak Date: Thu, 4 Apr 2024 16:47:25 +0200 Subject: [DAG] Preserve NUW when reassociating (#87621) Similarly to the generic case below, preserve the NUW flag when reassociating adds with constants. --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 11 +- llvm/test/CodeGen/AMDGPU/bf16.ll | 73 +- llvm/test/CodeGen/AMDGPU/function-returns.ll | 87 +- .../CodeGen/AMDGPU/gfx-callable-return-types.ll | 582 +- llvm/test/CodeGen/WebAssembly/multi-return.ll | 72 +- llvm/test/CodeGen/WebAssembly/simd-arith.ll | 13138 ++++++++----------- llvm/test/CodeGen/WebAssembly/simd.ll | 408 +- 7 files changed, 6077 insertions(+), 8294 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 0a47318..f20080c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -1164,19 +1164,20 @@ SDValue DAGCombiner::reassociateOpsCommutative(unsigned Opc, const SDLoc &DL, SDValue N01 = N0.getOperand(1); if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N01))) { + SDNodeFlags NewFlags; + if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && + Flags.hasNoUnsignedWrap()) + NewFlags.setNoUnsignedWrap(true); + if (DAG.isConstantIntBuildVectorOrConstantInt(peekThroughBitcasts(N1))) { // Reassociate: (op (op x, c1), c2) -> (op x, (op c1, c2)) if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, {N01, N1})) - return DAG.getNode(Opc, DL, VT, N00, OpNode); + return DAG.getNode(Opc, DL, VT, N00, OpNode, NewFlags); return SDValue(); } if (TLI.isReassocProfitable(DAG, N0, N1)) { // Reassociate: (op (op x, c1), y) -> (op (op x, y), c1) // iff (op x, c1) has one use - SDNodeFlags NewFlags; - if (N0.getOpcode() == ISD::ADD && N0->getFlags().hasNoUnsignedWrap() && - Flags.hasNoUnsignedWrap()) - NewFlags.setNoUnsignedWrap(true); SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N00, N1, NewFlags); return DAG.getNode(Opc, DL, VT, OpNode, N01, NewFlags); } diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll index 9865883..bf4302c 100644 --- a/llvm/test/CodeGen/AMDGPU/bf16.ll +++ b/llvm/test/CodeGen/AMDGPU/bf16.ll @@ -5678,22 +5678,18 @@ define { <32 x i32>, bfloat } @test_overflow_stack(bfloat %a, <32 x i32> %b) { ; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:8 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v31, off, s32 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x4 -; GFX11-NEXT: scratch_store_b128 off, v[18:21], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[10:13], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[6:9], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[2:5], s0 -; GFX11-NEXT: scratch_store_b16 off, v1, s0 offset:128 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 +; GFX11-NEXT: s_clause 0x5 +; GFX11-NEXT: scratch_store_b128 v0, v[22:25], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[18:21], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[14:17], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[10:13], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[6:9], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[2:5], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[30:33], s1 -; GFX11-NEXT: scratch_store_b128 off, v[26:29], s2 -; GFX11-NEXT: scratch_store_b128 off, v[22:25], s3 -; GFX11-NEXT: scratch_store_b128 off, v[14:17], s0 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_store_b128 v0, v[30:33], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[26:29], off offset:96 +; GFX11-NEXT: scratch_store_b16 v0, v1, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ins.0 = insertvalue { <32 x i32>, bfloat } poison, <32 x i32> %b, 0 %ins.1 = insertvalue { <32 x i32>, bfloat } %ins.0 ,bfloat %a, 1 @@ -8827,19 +8823,6 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: global_load_u16 v32, v[1:2], off offset:54 ; GFX11-NEXT: global_load_u16 v33, v[1:2], off offset:58 ; GFX11-NEXT: global_load_u16 v1, v[1:2], off offset:62 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 -; GFX11-NEXT: s_add_i32 s8, s0, 0x70 -; GFX11-NEXT: s_add_i32 s9, s0, 0x60 -; GFX11-NEXT: s_add_i32 s10, s0, 0x50 -; GFX11-NEXT: s_add_i32 s11, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(31) ; GFX11-NEXT: v_lshlrev_b32_e32 v39, 16, v3 ; GFX11-NEXT: s_waitcnt vmcnt(30) @@ -8936,23 +8919,23 @@ define <32 x double> @global_extload_v32bf16_to_v32f64(ptr addrspace(1) %ptr) { ; GFX11-NEXT: v_cvt_f64_f32_e32 v[5:6], v5 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[3:4], v2 ; GFX11-NEXT: v_cvt_f64_f32_e32 v[1:2], v37 -; GFX11-NEXT: scratch_store_b128 off, v[96:99], s1 -; GFX11-NEXT: scratch_store_b128 off, v[84:87], s2 -; GFX11-NEXT: scratch_store_b128 off, v[80:83], s3 -; GFX11-NEXT: scratch_store_b128 off, v[68:71], s4 -; GFX11-NEXT: scratch_store_b128 off, v[64:67], s5 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s6 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s7 -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s8 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s9 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s10 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s11 -; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0xf +; GFX11-NEXT: scratch_store_b128 v0, v[96:99], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[84:87], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[80:83], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[68:71], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[64:67], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] %load = load <32 x bfloat>, ptr addrspace(1) %ptr %fpext = fpext <32 x bfloat> %load to <32 x double> diff --git a/llvm/test/CodeGen/AMDGPU/function-returns.ll b/llvm/test/CodeGen/AMDGPU/function-returns.ll index acadee2..401cbce 100644 --- a/llvm/test/CodeGen/AMDGPU/function-returns.ll +++ b/llvm/test/CodeGen/AMDGPU/function-returns.ll @@ -1561,34 +1561,28 @@ define <33 x i32> @v33i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load <33 x i32>, ptr addrspace(1) %ptr @@ -1850,34 +1844,28 @@ define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[9:12], off, s[0:3], 0 offset:80 ; GFX11-NEXT: buffer_load_b128 v[13:16], off, s[0:3], 0 offset:64 ; GFX11-NEXT: buffer_load_b128 v[17:20], off, s[0:3], 0 offset:48 -; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:16 -; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 -; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[21:24], off, s[0:3], 0 offset:32 +; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:16 +; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 offset:128 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: s_add_i32 s3, s0, 0x50 -; GFX11-NEXT: s_add_i32 s4, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:96 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:80 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s0 offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:64 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:48 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s0 offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:32 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s0 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:16 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 offset:128 +; GFX11-NEXT: scratch_store_b32 v0, v33, off offset:128 ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { <32 x i32>, i32 }, ptr addrspace(1) %ptr @@ -2143,33 +2131,24 @@ define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 { ; GFX11-NEXT: buffer_load_b128 v[25:28], off, s[0:3], 0 offset:144 ; GFX11-NEXT: buffer_load_b128 v[29:32], off, s[0:3], 0 offset:128 ; GFX11-NEXT: buffer_load_b32 v33, off, s[0:3], 0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) -; GFX11-NEXT: s_add_i32 s1, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s3, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s4, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s5, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s6, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s7, s0, 0x90 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s4 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s5 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s6 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s7 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b32 off, v33, s0 +; GFX11-NEXT: scratch_store_b32 v0, v33, off ; GFX11-NEXT: s_setpc_b64 s[30:31] %ptr = load volatile ptr addrspace(1), ptr addrspace(4) undef %val = load { i32, <32 x i32> }, ptr addrspace(1) %ptr diff --git a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll index c1d6826..3b078c4 100644 --- a/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll +++ b/llvm/test/CodeGen/AMDGPU/gfx-callable-return-types.ll @@ -1989,256 +1989,138 @@ define amdgpu_gfx <512 x i32> @return_512xi32() #0 { ; GFX11-NEXT: s_mov_b32 s2, s0 ; GFX11-NEXT: v_dual_mov_b32 v4, s3 :: v_dual_mov_b32 v3, s2 ; GFX11-NEXT: v_dual_mov_b32 v2, s1 :: v_dual_mov_b32 v1, s0 -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 -; GFX11-NEXT: s_clause 0x7 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:1024 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:512 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:256 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:128 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:64 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:32 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 offset:16 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x7b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x7a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x790 -; GFX11-NEXT: s_add_i32 s2, s0, 0x780 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x770 -; GFX11-NEXT: s_add_i32 s2, s0, 0x760 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x750 -; GFX11-NEXT: s_add_i32 s2, s0, 0x740 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x730 -; GFX11-NEXT: s_add_i32 s2, s0, 0x720 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x710 -; GFX11-NEXT: s_add_i32 s2, s0, 0x700 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x6b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x6a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x690 -; GFX11-NEXT: s_add_i32 s2, s0, 0x680 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x670 -; GFX11-NEXT: s_add_i32 s2, s0, 0x660 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x650 -; GFX11-NEXT: s_add_i32 s2, s0, 0x640 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x630 -; GFX11-NEXT: s_add_i32 s2, s0, 0x620 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x610 -; GFX11-NEXT: s_add_i32 s2, s0, 0x600 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x5b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x5a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x590 -; GFX11-NEXT: s_add_i32 s2, s0, 0x580 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x570 -; GFX11-NEXT: s_add_i32 s2, s0, 0x560 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x550 -; GFX11-NEXT: s_add_i32 s2, s0, 0x540 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x530 -; GFX11-NEXT: s_add_i32 s2, s0, 0x520 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x510 -; GFX11-NEXT: s_add_i32 s2, s0, 0x500 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x4b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x4a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x490 -; GFX11-NEXT: s_add_i32 s2, s0, 0x480 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x470 -; GFX11-NEXT: s_add_i32 s2, s0, 0x460 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x450 -; GFX11-NEXT: s_add_i32 s2, s0, 0x440 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x430 -; GFX11-NEXT: s_add_i32 s2, s0, 0x420 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x410 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x3b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x3a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x390 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x380 -; GFX11-NEXT: s_add_i32 s2, s0, 0x370 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x360 -; GFX11-NEXT: s_add_i32 s2, s0, 0x350 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x340 -; GFX11-NEXT: s_add_i32 s2, s0, 0x330 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x320 -; GFX11-NEXT: s_add_i32 s2, s0, 0x310 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x300 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2f0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2e0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2d0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2c0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x2b0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x2a0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x290 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x280 -; GFX11-NEXT: s_add_i32 s2, s0, 0x270 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x260 -; GFX11-NEXT: s_add_i32 s2, s0, 0x250 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x240 -; GFX11-NEXT: s_add_i32 s2, s0, 0x230 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x220 -; GFX11-NEXT: s_add_i32 s2, s0, 0x210 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1f0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1e0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1d0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1c0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x1b0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x1a0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x190 -; GFX11-NEXT: s_add_i32 s2, s0, 0x180 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x170 -; GFX11-NEXT: s_add_i32 s2, s0, 0x160 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x150 -; GFX11-NEXT: s_add_i32 s2, s0, 0x140 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x130 -; GFX11-NEXT: s_add_i32 s2, s0, 0x120 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: s_add_i32 s2, s0, 0xf0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xd0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s2, s0, 0xb0 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x90 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x70 -; GFX11-NEXT: s_add_i32 s2, s0, 0x60 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s2 -; GFX11-NEXT: s_add_i32 s1, s0, 0x50 -; GFX11-NEXT: s_add_i32 s0, s0, 48 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s1 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2032 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2016 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:2000 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1984 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1968 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1952 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1936 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1920 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1904 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1888 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1872 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1856 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1840 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1824 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1808 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1792 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1776 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1760 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1744 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1728 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1712 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1696 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1680 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1664 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1648 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1632 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1616 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1600 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1584 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1568 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1552 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1536 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1520 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1504 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1488 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1472 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1456 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1440 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1424 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1408 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1392 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1376 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1360 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1344 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1328 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1312 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1296 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1280 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1264 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1248 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1232 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1216 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1200 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1184 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1168 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1152 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1136 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1120 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1104 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1088 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1072 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1056 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1040 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1024 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:1008 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:992 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:976 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:960 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:944 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:928 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:912 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:896 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:880 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:864 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:848 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:832 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:816 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:800 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:784 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:768 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:752 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:736 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:720 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:704 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:688 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:672 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:656 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:640 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:624 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:608 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:592 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:576 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:560 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:544 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:528 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:512 +; GFX11-NEXT: s_clause 0x1f +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:496 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:480 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:464 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:448 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:432 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:416 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:400 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:384 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:368 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:352 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:336 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:320 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:304 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:288 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:272 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:256 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:240 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:224 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:208 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:192 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:176 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:160 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:144 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:80 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:64 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:48 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:32 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: ret <512 x i32> zeroinitializer @@ -2636,7 +2518,6 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-LABEL: return_72xi32: ; GFX11: ; %bb.0: ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: v_readfirstlane_b32 s0, v0 ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_store_b32 off, v40, s32 offset:212 ; GFX11-NEXT: scratch_store_b32 off, v41, s32 offset:208 @@ -2651,93 +2532,82 @@ define amdgpu_gfx <72 x i32> @return_72xi32(<72 x i32> %val) #1 { ; GFX11-NEXT: scratch_store_b32 off, v61, s32 offset:172 ; GFX11-NEXT: scratch_store_b32 off, v62, s32 offset:168 ; GFX11-NEXT: scratch_store_b32 off, v63, s32 offset:164 -; GFX11-NEXT: s_clause 0x14 -; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:32 -; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:28 -; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:24 -; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:48 -; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:44 -; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:40 -; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:64 -; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:60 -; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:56 -; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:80 -; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:76 -; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:72 -; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:96 -; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:92 -; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:88 -; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:112 -; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:108 -; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:104 -; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:128 -; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:124 -; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:120 -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s0 offset:64 +; GFX11-NEXT: s_clause 0x11 +; GFX11-NEXT: scratch_load_b32 v36, off, s32 offset:16 +; GFX11-NEXT: scratch_load_b32 v35, off, s32 offset:12 +; GFX11-NEXT: scratch_load_b32 v34, off, s32 offset:8 +; GFX11-NEXT: scratch_load_b32 v51, off, s32 offset:32 +; GFX11-NEXT: scratch_load_b32 v50, off, s32 offset:28 +; GFX11-NEXT: scratch_load_b32 v49, off, s32 offset:24 +; GFX11-NEXT: scratch_load_b32 v55, off, s32 offset:48 +; GFX11-NEXT: scratch_load_b32 v54, off, s32 offset:44 +; GFX11-NEXT: scratch_load_b32 v53, off, s32 offset:40 +; GFX11-NEXT: scratch_load_b32 v40, off, s32 offset:64 +; GFX11-NEXT: scratch_load_b32 v39, off, s32 offset:60 +; GFX11-NEXT: scratch_load_b32 v38, off, s32 offset:56 +; GFX11-NEXT: scratch_load_b32 v44, off, s32 offset:80 +; GFX11-NEXT: scratch_load_b32 v43, off, s32 offset:76 +; GFX11-NEXT: scratch_load_b32 v42, off, s32 offset:72 +; GFX11-NEXT: scratch_load_b32 v59, off, s32 offset:96 +; GFX11-NEXT: scratch_load_b32 v58, off, s32 offset:92 +; GFX11-NEXT: scratch_load_b32 v57, off, s32 offset:88 +; GFX11-NEXT: scratch_store_b128 v0, v[21:24], off offset:80 +; GFX11-NEXT: s_clause 0x2 +; GFX11-NEXT: scratch_load_b32 v23, off, s32 offset:112 +; GFX11-NEXT: scratch_load_b32 v22, off, s32 offset:108 +; GFX11-NEXT: scratch_load_b32 v21, off, s32 offset:104 +; GFX11-NEXT: scratch_store_b128 v0, v[17:20], off offset:64 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:144 -; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:140 -; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:136 -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s0 offset:32 +; GFX11-NEXT: scratch_load_b32 v19, off, s32 offset:128 +; GFX11-NEXT: scratch_load_b32 v18, off, s32 offset:124 +; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:120 +; GFX11-NEXT: scratch_store_b128 v0, v[13:16], off offset:48 ; GFX11-NEXT: s_clause 0x2 -; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:160 -; GFX11-NEXT: scratch_load_b32 v11, off, s32 offset:156 -; GFX11-NEXT: scratch_load_b32 v10, off, s32 offset:152 -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:16 +; GFX11-NEXT: scratch_load_b32 v15, off, s32 offset:144 +; GFX11-NEXT: scratch_load_b32 v14, off, s32 offset:140 +; GFX11-NEXT: scratch_load_b32 v13, off, s32 offset:136 +; GFX11-NEXT: scratch_store_b128 v0, v[9:12], off offset:32 ; GFX11-NEXT: s_clause 0xd -; GFX11-NEXT: scratch_load_b32 v8, off, s32 offset:16 -; GFX11-NEXT: scratch_load_b32 v7, off, s32 offset:12 -; GFX11-NEXT: scratch_load_b32 v6, off, s32 offset:8 -; GFX11-NEXT: scratch_load_b32 v5, off, s32 offset:4 -; GFX11-NEXT: scratch_load_b32 v9, off, s32 offset:148 -; GFX11-NEXT: scratch_load_b32 v17, off, s32 offset:132 -; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:116 -; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:100 -; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:84 -; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:68 -; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:52 -; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:36 -; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:160 +; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:156 +; GFX11-NEXT: scratch_load_b32 v61, off, s32 offset:152 +; GFX11-NEXT: scratch_load_b32 v60, off, s32 offset:148 +; GFX11-NEXT: scratch_load_b32 v12, off, s32 offset:132 +; GFX11-NEXT: scratch_load_b32 v16, off, s32 offset:116 +; GFX11-NEXT: scratch_load_b32 v20, off, s32 offset:100 +; GFX11-NEXT: scratch_load_b32 v56, off, s32 offset:84 +; GFX11-NEXT: scratch_load_b32 v41, off, s32 offset:68 +; GFX11-NEXT: scratch_load_b32 v37, off, s32 offset:52 +; GFX11-NEXT: scratch_load_b32 v52, off, s32 offset:36 +; GFX11-NEXT: scratch_load_b32 v48, off, s32 offset:20 +; GFX11-NEXT: scratch_load_b32 v33, off, s32 offset:4 ; GFX11-NEXT: scratch_load_b32 v32, off, s32 -; GFX11-NEXT: s_add_i32 s1, s0, 0x110 -; GFX11-NEXT: scratch_store_b128 off, v[1:4], s0 -; GFX11-NEXT: s_add_i32 s2, s0, 0x100 -; GFX11-NEXT: s_add_i32 s3, s0, 0xf0 -; GFX11-NEXT: s_add_i32 s34, s0, 0xe0 -; GFX11-NEXT: s_add_i32 s35, s0, 0xd0 -; GFX11-NEXT: s_add_i32 s36, s0, 0xc0 -; GFX11-NEXT: s_add_i32 s37, s0, 0xb0 -; GFX11-NEXT: s_add_i32 s38, s0, 0xa0 -; GFX11-NEXT: s_add_i32 s39, s0, 0x90 -; GFX11-NEXT: s_add_i32 s40, s0, 0x70 -; GFX11-NEXT: s_add_i32 s41, s0, 0x60 -; GFX11-NEXT: s_add_i32 s42, s0, 0x50 -; GFX11-NEXT: s_add_i32 s43, s0, 48 ; GFX11-NEXT: s_waitcnt vmcnt(10) -; GFX11-NEXT: scratch_store_b128 off, v[5:8], s0 offset:128 +; GFX11-NEXT: scratch_store_b128 v0, v[60:63], off offset:272 ; GFX11-NEXT: s_waitcnt vmcnt(9) -; GFX11-NEXT: scratch_store_b128 off, v[9:12], s1 +; GFX11-NEXT: scratch_store_b128 v0, v[12:15], off offset:256 ; GFX11-NEXT: s_waitcnt vmcnt(8) -; GFX11-NEXT: scratch_store_b128 off, v[17:20], s2 +; GFX11-NEXT: scratch_store_b128 v0, v[16:19], off offset:240 ; GFX11-NEXT: s_waitcnt vmcnt(7) -; GFX11-NEXT: scratch_store_b128 off, v[60:63], s3 +; GFX11-NEXT: scratch_store_b128 v0, v[20:23], off offset:224 ; GFX11-NEXT: s_waitcnt vmcnt(6) -; GFX11-NEXT: scratch_store_b128 off, v[56:59], s34 +; GFX11-NEXT: scratch_store_b128 v0, v[56:59], off offset:208 ; GFX11-NEXT: s_waitcnt vmcnt(5) -; GFX11-NEXT: scratch_store_b128 off, v[41:44], s35 +; GFX11-NEXT: scratch_store_b128 v0, v[41:44], off offset:192 ; GFX11-NEXT: s_waitcnt vmcnt(4) -; GFX11-NEXT: scratch_store_b128 off, v[37:40], s36 +; GFX11-NEXT: scratch_store_b128 v0, v[37:40], off offset:176 ; GFX11-NEXT: s_waitcnt vmcnt(3) -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s37 +; GFX11-NEXT: scratch_store_b128 v0, v[52:55], off offset:160 ; GFX11-NEXT: s_waitcnt vmcnt(2) -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s38 +; GFX11-NEXT: scratch_store_b128 v0, v[48:51], off offset:144 ; GFX11-NEXT: s_waitcnt vmcnt(1) -; GFX11-NEXT: scratch_store_b128 off, v[33:36], s39 +; GFX11-NEXT: scratch_store_b128 v0, v[33:36], off offset:128 ; GFX11-NEXT: s_waitcnt vmcnt(0) -; GFX11-NEXT: scratch_store_b128 off, v[29:32], s40 -; GFX11-NEXT: scratch_store_b128 off, v[25:28], s41 -; GFX11-NEXT: scratch_store_b128 off, v[21:24], s42 -; GFX11-NEXT: scratch_store_b128 off, v[13:16], s43 +; GFX11-NEXT: s_clause 0x3 +; GFX11-NEXT: scratch_store_b128 v0, v[29:32], off offset:112 +; GFX11-NEXT: scratch_store_b128 v0, v[25:28], off offset:96 +; GFX11-NEXT: scratch_store_b128 v0, v[5:8], off offset:16 +; GFX11-NEXT: scratch_store_b128 v0, v[1:4], off ; GFX11-NEXT: s_clause 0xc ; GFX11-NEXT: scratch_load_b32 v63, off, s32 offset:164 ; GFX11-NEXT: scratch_load_b32 v62, off, s32 offset:168 @@ -3306,7 +3176,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-LABEL: call_72xi32: ; GFX11: ; %bb.0: ; %entry ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GFX11-NEXT: s_mov_b32 s46, s33 +; GFX11-NEXT: s_mov_b32 s34, s33 ; GFX11-NEXT: s_add_i32 s33, s32, 0x1ff ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) ; GFX11-NEXT: s_and_b32 s33, s33, 0xfffffe00 @@ -3353,11 +3223,11 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 ; GFX11-NEXT: s_add_i32 s0, s32, 32 ; GFX11-NEXT: s_add_i32 s1, s32, 16 +; GFX11-NEXT: s_add_i32 s2, s33, 0x200 +; GFX11-NEXT: v_writelane_b32 v60, s30, 0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 ; GFX11-NEXT: scratch_store_b128 off, v[0:3], s1 -; GFX11-NEXT: s_add_i32 s0, s33, 0x200 -; GFX11-NEXT: v_writelane_b32 v60, s30, 0 -; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v3, 0 +; GFX11-NEXT: v_dual_mov_b32 v0, s2 :: v_dual_mov_b32 v3, 0 ; GFX11-NEXT: v_dual_mov_b32 v1, 0 :: v_dual_mov_b32 v2, 0 ; GFX11-NEXT: v_dual_mov_b32 v5, 0 :: v_dual_mov_b32 v4, 0 ; GFX11-NEXT: v_dual_mov_b32 v7, 0 :: v_dual_mov_b32 v6, 0 @@ -3373,14 +3243,14 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v27, 0 :: v_dual_mov_b32 v26, 0 ; GFX11-NEXT: v_dual_mov_b32 v29, 0 :: v_dual_mov_b32 v28, 0 ; GFX11-NEXT: v_dual_mov_b32 v31, 0 :: v_dual_mov_b32 v30, 0 -; GFX11-NEXT: s_mov_b32 s45, return_72xi32@abs32@hi -; GFX11-NEXT: s_mov_b32 s44, return_72xi32@abs32@lo +; GFX11-NEXT: s_mov_b32 s1, return_72xi32@abs32@hi +; GFX11-NEXT: s_mov_b32 s0, return_72xi32@abs32@lo ; GFX11-NEXT: v_writelane_b32 v60, s31, 1 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0x1 ; GFX11-NEXT: scratch_load_b128 v[45:48], off, s33 offset:624 ; GFX11-NEXT: scratch_load_b128 v[33:36], off, s33 offset:640 -; GFX11-NEXT: s_add_i32 s0, s32, 0xa0 +; GFX11-NEXT: s_add_i32 s2, s32, 0xa0 ; GFX11-NEXT: s_waitcnt vmcnt(1) ; GFX11-NEXT: v_mov_b32_e32 v32, v48 ; GFX11-NEXT: s_clause 0x9 @@ -3431,38 +3301,38 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: v_dual_mov_b32 v2, v5 :: v_dual_mov_b32 v3, v6 ; GFX11-NEXT: v_dual_mov_b32 v5, v8 :: v_dual_mov_b32 v6, v9 ; GFX11-NEXT: v_mov_b32_e32 v9, v20 -; GFX11-NEXT: scratch_store_b32 off, v11, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x90 +; GFX11-NEXT: scratch_store_b32 off, v11, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x90 ; GFX11-NEXT: v_mov_b32_e32 v11, v22 -; GFX11-NEXT: scratch_store_b128 off, v[4:7], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x80 +; GFX11-NEXT: scratch_store_b128 off, v[4:7], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x80 ; GFX11-NEXT: v_mov_b32_e32 v5, v16 -; GFX11-NEXT: scratch_store_b128 off, v[0:3], s0 +; GFX11-NEXT: scratch_store_b128 off, v[0:3], s2 ; GFX11-NEXT: v_mov_b32_e32 v0, 24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x70 +; GFX11-NEXT: s_add_i32 s2, s32, 0x70 ; GFX11-NEXT: v_mov_b32_e32 v6, v17 -; GFX11-NEXT: scratch_store_b128 off, v[12:15], s0 +; GFX11-NEXT: scratch_store_b128 off, v[12:15], s2 ; GFX11-NEXT: v_mov_b32_e32 v13, v24 -; GFX11-NEXT: s_add_i32 s0, s32, 0x6c +; GFX11-NEXT: s_add_i32 s2, s32, 0x6c ; GFX11-NEXT: v_mov_b32_e32 v7, v18 -; GFX11-NEXT: scratch_store_b32 off, v0, s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x60 +; GFX11-NEXT: scratch_store_b32 off, v0, s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x60 ; GFX11-NEXT: v_dual_mov_b32 v8, v19 :: v_dual_mov_b32 v15, v26 -; GFX11-NEXT: scratch_store_b96 off, v[56:58], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 0x50 +; GFX11-NEXT: scratch_store_b96 off, v[56:58], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 0x50 ; GFX11-NEXT: v_dual_mov_b32 v12, v23 :: v_dual_mov_b32 v29, v45 -; GFX11-NEXT: scratch_store_b128 off, v[40:43], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 64 +; GFX11-NEXT: scratch_store_b128 off, v[40:43], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 64 ; GFX11-NEXT: v_mov_b32_e32 v14, v25 -; GFX11-NEXT: scratch_store_b128 off, v[52:55], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 48 +; GFX11-NEXT: scratch_store_b128 off, v[52:55], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 48 ; GFX11-NEXT: v_mov_b32_e32 v16, v27 -; GFX11-NEXT: scratch_store_b128 off, v[36:39], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 32 +; GFX11-NEXT: scratch_store_b128 off, v[36:39], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 32 ; GFX11-NEXT: v_mov_b32_e32 v30, v46 -; GFX11-NEXT: scratch_store_b128 off, v[48:51], s0 -; GFX11-NEXT: s_add_i32 s0, s32, 16 -; GFX11-NEXT: scratch_store_b128 off, v[32:35], s0 +; GFX11-NEXT: scratch_store_b128 off, v[48:51], s2 +; GFX11-NEXT: s_add_i32 s2, s32, 16 +; GFX11-NEXT: scratch_store_b128 off, v[32:35], s2 ; GFX11-NEXT: scratch_load_b128 v[1:4], off, s33 offset:1588 ; 16-byte Folded Reload ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: v_mov_b32_e32 v1, 42 @@ -3470,10 +3340,10 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b128 v[17:20], off, s33 offset:1572 ; GFX11-NEXT: scratch_load_b128 v[21:24], off, s33 offset:1556 ; GFX11-NEXT: scratch_load_b128 v[25:28], off, s33 offset:1540 -; GFX11-NEXT: s_add_i32 s0, s33, 0x400 +; GFX11-NEXT: s_add_i32 s2, s33, 0x400 ; GFX11-NEXT: s_delay_alu instid0(SALU_CYCLE_1) -; GFX11-NEXT: v_mov_b32_e32 v0, s0 -; GFX11-NEXT: s_swappc_b64 s[30:31], s[44:45] +; GFX11-NEXT: v_mov_b32_e32 v0, s2 +; GFX11-NEXT: s_swappc_b64 s[30:31], s[0:1] ; GFX11-NEXT: s_clause 0xb ; GFX11-NEXT: scratch_load_b32 v59, off, s33 ; GFX11-NEXT: scratch_load_b32 v58, off, s33 offset:4 @@ -3493,7 +3363,7 @@ define amdgpu_gfx void @call_72xi32() #1 { ; GFX11-NEXT: scratch_load_b32 v60, off, s33 offset:1536 ; 4-byte Folded Reload ; GFX11-NEXT: s_mov_b32 exec_lo, s0 ; GFX11-NEXT: s_addk_i32 s32, 0xf600 -; GFX11-NEXT: s_mov_b32 s33, s46 +; GFX11-NEXT: s_mov_b32 s33, s34 ; GFX11-NEXT: s_waitcnt vmcnt(0) ; GFX11-NEXT: s_setpc_b64 s[30:31] entry: diff --git a/llvm/test/CodeGen/WebAssembly/multi-return.ll b/llvm/test/CodeGen/WebAssembly/multi-return.ll index 3429cd5..293a1b3 100644 --- a/llvm/test/CodeGen/WebAssembly/multi-return.ll +++ b/llvm/test/CodeGen/WebAssembly/multi-return.ll @@ -78,18 +78,16 @@ define i64 @test4() { define { i64, i128 } @test5() { ; CHECK-LABEL: test5: ; CHECK: call return_multi_multi -; CHECK: i32.const $push8=, 8 -; CHECK: i32.add $push9=, $[[SP:[0-9]+]], $pop8 -; CHECK: i32.const $push0=, 16 -; CHECK: i32.add $push1=, $pop9, $pop0 +; CHECK: i32.const $push0=, 24 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) ; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) ; CHECK: i64.load $push2=, 16($[[SP]]) ; CHECK: i64.store 8($0), $pop2 +; CHECK: i64.store 16($0), $[[L1]] ; CHECK: i64.store 0($0), $[[L2]] -; CHECK: i32.const $push12=, 16 -; CHECK: i32.add $push3=, $0, $pop12 -; CHECK: i64.store 0($pop3), $[[L1]] +; CHECK: i32.const $push5=, 80 +; CHECK: i32.add $push6=, $3, $pop5 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 @@ -101,20 +99,20 @@ define { i64, i128 } @test5() { define { i128, i128 } @test6() { ; CHECK-LABEL: test6: ; CHECK: call return_multi_multi -; CHECK: i32.const $push0=, 64 +; CHECK: i32.const $push0=, 24 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 24 +; CHECK: i32.const $push2=, 64 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) ; CHECK: i64.load $[[L3:[0-9]+]]=, 16($[[SP]]) ; CHECK: i64.load $push4=, 56($[[SP]]) ; CHECK: i64.store 16($0), $pop4 +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i64.store 8($0), $[[L2]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L1]] +; CHECK: i64.store 8($0), $[[L1]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 %r3 = extractvalue { i64, i128, i192, i128, i64 } %t0, 3 @@ -129,19 +127,17 @@ define { i64, i192 } @test7() { ; CHECK: i32.const $push0=, 40 ; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) +; CHECK: i64.load $[[L2:[0-9]+]]=, 8($[[SP]]) +; CHECK: i64.load $[[L3:[0-9]+]]=, 32($[[SP]]) ; CHECK: i32.const $push2=, 48 ; CHECK: i32.add $push3=, $[[SP]], $pop2 -; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i64.load $[[L3:[0-9]+]]=, 8($[[SP]]) -; CHECK: i64.load $push4=, 32($[[SP]]) -; CHECK: i64.store 8($0), $pop4 -; CHECK: i64.store 0($0), $[[L3]] -; CHECK: i32.const $push5=, 24 -; CHECK: i32.add $push6=, $0, $pop5 -; CHECK: i64.store 0($pop6), $[[L2]] -; CHECK: i32.const $push7=, 16 -; CHECK: i32.add $push8=, $0, $pop7 -; CHECK: i64.store 0($pop8), $[[L1]] +; CHECK: i64.load $push4=, 0($pop3) +; CHECK: i64.store 24($0), $pop4 +; CHECK: i64.store 8($0), $[[L3]] +; CHECK: i64.store 16($0), $[[L1]] +; CHECK: i64.store 0($0), $[[L2]] +; CHECK: i32.const $push7=, 80 +; CHECK: i32.add $push8=, $4, $pop7 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r2 = extractvalue { i64, i128, i192, i128, i64 } %t0, 2 @@ -153,18 +149,16 @@ define { i64, i192 } @test7() { define { i128, i192, i128, i64 } @test8() { ; CHECK-LABEL: test8: ; CHECK: call return_multi_multi -; CHECK: i32.const $push18=, 8 -; CHECK: i32.add $push19=, $[[SP:[0-9]+]], $pop18 -; CHECK: i32.const $push0=, 32 -; CHECK: i32.add $push1=, $pop19, $pop0 +; CHECK: i32.const $push0=, 64 +; CHECK: i32.add $push1=, $[[SP:[0-9]+]], $pop0 ; CHECK: i64.load $[[L1:[0-9]+]]=, 0($pop1) -; CHECK: i32.const $push2=, 48 +; CHECK: i32.const $push2=, 40 ; CHECK: i32.add $push3=, $[[SP]], $pop2 ; CHECK: i64.load $[[L2:[0-9]+]]=, 0($pop3) -; CHECK: i32.const $push4=, 24 +; CHECK: i32.const $push4=, 48 ; CHECK: i32.add $push5=, $[[SP]], $pop4 ; CHECK: i64.load $[[L3:[0-9]+]]=, 0($pop5) -; CHECK: i32.const $push6=, 64 +; CHECK: i32.const $push6=, 24 ; CHECK: i32.add $push7=, $[[SP]], $pop6 ; CHECK: i64.load $[[L4:[0-9]+]]=, 0($pop7) ; CHECK: i64.load $[[L5:[0-9]+]]=, 8($[[SP]]) @@ -172,19 +166,15 @@ define { i128, i192, i128, i64 } @test8() { ; CHECK: i64.load $[[L7:[0-9]+]]=, 32($[[SP]]) ; CHECK: i64.load $push8=, 16($[[SP]]) ; CHECK: i64.store 40($0), $pop8 +; CHECK: i64.store 48($0), $[[L4]] +; CHECK: i64.store 32($0), $[[L3]] ; CHECK: i64.store 16($0), $[[L7]] +; CHECK: i64.store 24($0), $[[L2]] ; CHECK: i64.store 0($0), $[[L6]] -; CHECK: i64.store 8($0), $[[L4]] +; CHECK: i64.store 8($0), $[[L1]] ; CHECK: i64.store 56($0), $[[L5]] -; CHECK: i32.const $push9=, 48 -; CHECK: i32.add $push10=, $0, $pop9 -; CHECK: i64.store 0($pop10), $[[L3]] -; CHECK: i32.const $push22=, 32 -; CHECK: i32.add $push11=, $0, $pop22 -; CHECK: i64.store 0($pop11), $[[L2]] -; CHECK: i32.const $push12=, 24 -; CHECK: i32.add $push13=, $0, $pop12 -; CHECK: i64.store 0($pop13), $[[L1]] +; CHECK: i32.const $push11=, 80 +; CHECK: i32.add $push12=, $8, $pop11 %t0 = call { i64, i128, i192, i128, i64 } @return_multi_multi() %r0 = extractvalue { i64, i128, i192, i128, i64 } %t0, 0 %r1 = extractvalue { i64, i128, i192, i128, i64 } %t0, 1 diff --git a/llvm/test/CodeGen/WebAssembly/simd-arith.ll b/llvm/test/CodeGen/WebAssembly/simd-arith.ll index 3a806b9..761a754 100644 --- a/llvm/test/CodeGen/WebAssembly/simd-arith.ll +++ b/llvm/test/CodeGen/WebAssembly/simd-arith.ll @@ -31,60 +31,38 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: add_v16i8: ; NO-SIMD128: .functype add_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.add $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.add $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.add $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.add $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.add $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.add $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.add $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.add $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.add $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.add $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.add $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.add $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.add $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.add $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.add $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.add $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v16i8: @@ -96,54 +74,32 @@ define <16 x i8> @add_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.add $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.add $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.add $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.add $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.add $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y ret <16 x i8> %a @@ -165,60 +121,38 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: sub_v16i8: ; NO-SIMD128: .functype sub_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.sub $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.sub $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.sub $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.sub $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.sub $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.sub $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.sub $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.sub $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.sub $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.sub $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.sub $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.sub $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.sub $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.sub $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.sub $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.sub $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.sub $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.sub $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.sub $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.sub $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v16i8: @@ -230,54 +164,32 @@ define <16 x i8> @sub_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.sub $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.sub $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.sub $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.sub $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> %x, %y ret <16 x i8> %a @@ -425,60 +337,38 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: mul_v16i8: ; NO-SIMD128: .functype mul_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.mul $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.mul $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.mul $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.mul $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.mul $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.mul $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.mul $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.mul $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.mul $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.mul $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.mul $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.mul $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.mul $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.mul $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.mul $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.mul $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.mul $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.mul $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.mul $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.mul $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.mul $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v16i8: @@ -490,54 +380,32 @@ define <16 x i8> @mul_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.mul $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.mul $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.mul $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.mul $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = mul <16 x i8> %x, %y ret <16 x i8> %a @@ -559,108 +427,86 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_s_v16i8: ; NO-SIMD128: .functype min_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.lt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.lt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.lt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.lt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.lt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.lt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.lt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.lt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.lt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.lt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.lt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v16i8: @@ -681,93 +527,71 @@ define <16 x i8> @min_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -790,140 +614,118 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: min_u_v16i8: ; NO-SIMD128: .functype min_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.lt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.lt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.lt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.lt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.lt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.lt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.lt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.lt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.lt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.lt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v16i8: @@ -931,138 +733,116 @@ define <16 x i8> @min_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1085,108 +865,86 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_s_v16i8: ; NO-SIMD128: .functype max_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $16, $32, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $31 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $15, $31, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 13 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $30 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $31 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $15, $31, $pop6 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop7 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $30 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $14, $30, $pop10 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $29 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $14, $30, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push22=, 12 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $29 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $13, $29, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push28=, 11 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.extend8_s $push25=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push24=, $28 +; NO-SIMD128-NEXT: i32.select $push15=, $13, $29, $pop14 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $28 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $12, $28, $pop18 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop19 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $27 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $11, $27, $pop22 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop23 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push24=, $26 ; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 -; NO-SIMD128-NEXT: i32.select $push27=, $12, $28, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push34=, 10 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push30=, $27 -; NO-SIMD128-NEXT: i32.gt_s $push32=, $pop31, $pop30 -; NO-SIMD128-NEXT: i32.select $push33=, $11, $27, $pop32 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push40=, 9 -; NO-SIMD128-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-NEXT: i32.select $push27=, $10, $26, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $25 +; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.select $push31=, $9, $25, $pop30 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop31 +; NO-SIMD128-NEXT: i32.extend8_s $push33=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $24 +; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-NEXT: i32.select $push35=, $8, $24, $pop34 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop35 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push36=, $23 ; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $10, $26, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop41), $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $25 -; NO-SIMD128-NEXT: i32.gt_s $push44=, $pop43, $pop42 -; NO-SIMD128-NEXT: i32.select $push45=, $9, $25, $pop44 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-NEXT: i32.const $push50=, 7 -; NO-SIMD128-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push46=, $24 -; NO-SIMD128-NEXT: i32.gt_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.select $push49=, $8, $24, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop51), $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 6 -; NO-SIMD128-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $23 +; NO-SIMD128-NEXT: i32.select $push39=, $7, $23, $pop38 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $22 +; NO-SIMD128-NEXT: i32.gt_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.select $push43=, $6, $22, $pop42 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop43 +; NO-SIMD128-NEXT: i32.extend8_s $push45=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $21 +; NO-SIMD128-NEXT: i32.gt_s $push46=, $pop45, $pop44 +; NO-SIMD128-NEXT: i32.select $push47=, $5, $21, $pop46 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop47 +; NO-SIMD128-NEXT: i32.extend8_s $push49=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push48=, $20 +; NO-SIMD128-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-NEXT: i32.select $push51=, $4, $20, $pop50 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop51 +; NO-SIMD128-NEXT: i32.extend8_s $push53=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push52=, $19 ; NO-SIMD128-NEXT: i32.gt_s $push54=, $pop53, $pop52 -; NO-SIMD128-NEXT: i32.select $push55=, $7, $23, $pop54 -; NO-SIMD128-NEXT: i32.store8 0($pop57), $pop55 -; NO-SIMD128-NEXT: i32.const $push62=, 5 -; NO-SIMD128-NEXT: i32.add $push63=, $0, $pop62 -; NO-SIMD128-NEXT: i32.extend8_s $push59=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push58=, $22 -; NO-SIMD128-NEXT: i32.gt_s $push60=, $pop59, $pop58 -; NO-SIMD128-NEXT: i32.select $push61=, $6, $22, $pop60 -; NO-SIMD128-NEXT: i32.store8 0($pop63), $pop61 -; NO-SIMD128-NEXT: i32.extend8_s $push65=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $21 -; NO-SIMD128-NEXT: i32.gt_s $push66=, $pop65, $pop64 -; NO-SIMD128-NEXT: i32.select $push67=, $5, $21, $pop66 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop67 -; NO-SIMD128-NEXT: i32.const $push72=, 3 -; NO-SIMD128-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-NEXT: i32.extend8_s $push69=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push68=, $20 -; NO-SIMD128-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-NEXT: i32.select $push71=, $4, $20, $pop70 -; NO-SIMD128-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-NEXT: i32.extend8_s $push75=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push74=, $19 -; NO-SIMD128-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-NEXT: i32.select $push77=, $3, $19, $pop76 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop77 -; NO-SIMD128-NEXT: i32.extend8_s $push79=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push78=, $18 -; NO-SIMD128-NEXT: i32.gt_s $push80=, $pop79, $pop78 -; NO-SIMD128-NEXT: i32.select $push81=, $2, $18, $pop80 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop81 -; NO-SIMD128-NEXT: i32.extend8_s $push83=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push82=, $17 -; NO-SIMD128-NEXT: i32.gt_s $push84=, $pop83, $pop82 -; NO-SIMD128-NEXT: i32.select $push85=, $1, $17, $pop84 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop85 +; NO-SIMD128-NEXT: i32.select $push55=, $3, $19, $pop54 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop55 +; NO-SIMD128-NEXT: i32.extend8_s $push57=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push56=, $18 +; NO-SIMD128-NEXT: i32.gt_s $push58=, $pop57, $pop56 +; NO-SIMD128-NEXT: i32.select $push59=, $2, $18, $pop58 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop59 +; NO-SIMD128-NEXT: i32.extend8_s $push61=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push60=, $17 +; NO-SIMD128-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-NEXT: i32.select $push63=, $1, $17, $pop62 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop63 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v16i8: @@ -1207,93 +965,71 @@ define <16 x i8> @max_s_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $20, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $21 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $21 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $22 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $24 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $23, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $24 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $25 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $24, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $9 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $25 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push34=, $pop33, $pop32 +; NO-SIMD128-FAST-NEXT: i32.select $push35=, $9, $25, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push36=, $26 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push38=, $pop37, $pop36 +; NO-SIMD128-FAST-NEXT: i32.select $push39=, $10, $26, $pop38 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $27 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push42=, $pop41, $pop40 -; NO-SIMD128-FAST-NEXT: i32.select $push43=, $9, $25, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $26 +; NO-SIMD128-FAST-NEXT: i32.select $push43=, $11, $27, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop43 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $28 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push46=, $pop45, $pop44 -; NO-SIMD128-FAST-NEXT: i32.select $push47=, $10, $26, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push51=, $11 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $27 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push52=, $pop51, $pop50 -; NO-SIMD128-FAST-NEXT: i32.select $push53=, $11, $27, $pop52 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $28 +; NO-SIMD128-FAST-NEXT: i32.select $push47=, $12, $28, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop47 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push48=, $29 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push50=, $pop49, $pop48 +; NO-SIMD128-FAST-NEXT: i32.select $push51=, $13, $29, $pop50 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop51 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push52=, $30 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push54=, $pop53, $pop52 +; NO-SIMD128-FAST-NEXT: i32.select $push55=, $14, $30, $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop55 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push57=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push56=, $31 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.select $push59=, $12, $28, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push63=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push62=, $29 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push64=, $pop63, $pop62 -; NO-SIMD128-FAST-NEXT: i32.select $push65=, $13, $29, $pop64 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $0, $pop72 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push68=, $30 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.select $push71=, $14, $30, $pop70 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop73), $pop71 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $0, $pop78 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push75=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push74=, $31 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push76=, $pop75, $pop74 -; NO-SIMD128-FAST-NEXT: i32.select $push77=, $15, $31, $pop76 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop79), $pop77 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $0, $pop84 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push81=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push80=, $32 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push82=, $pop81, $pop80 -; NO-SIMD128-FAST-NEXT: i32.select $push83=, $16, $32, $pop82 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop85), $pop83 +; NO-SIMD128-FAST-NEXT: i32.select $push59=, $15, $31, $pop58 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop59 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push61=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $32 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push62=, $pop61, $pop60 +; NO-SIMD128-FAST-NEXT: i32.select $push63=, $16, $32, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop63 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1316,140 +1052,118 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: max_u_v16i8: ; NO-SIMD128: .functype max_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 255 ; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 -; NO-SIMD128-NEXT: i32.const $push117=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop117 +; NO-SIMD128-NEXT: i32.const $push95=, 255 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop95 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $16, $32, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push116=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $31, $pop115 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $15, $31, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push114=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $14, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $30, $pop113 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $14, $30, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push112=, 255 -; NO-SIMD128-NEXT: i32.and $push20=, $13, $pop112 -; NO-SIMD128-NEXT: i32.const $push111=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $29, $pop111 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $13, $29, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push110=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $12, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 255 -; NO-SIMD128-NEXT: i32.and $push25=, $28, $pop109 -; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.select $push28=, $12, $28, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.const $push108=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop107 -; NO-SIMD128-NEXT: i32.gt_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.select $push34=, $11, $27, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push106=, 255 -; NO-SIMD128-NEXT: i32.and $push38=, $10, $pop106 -; NO-SIMD128-NEXT: i32.const $push105=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $26, $pop105 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $10, $26, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push104=, 255 -; NO-SIMD128-NEXT: i32.and $push44=, $9, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $25, $pop103 -; NO-SIMD128-NEXT: i32.gt_u $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.select $push46=, $9, $25, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.const $push102=, 255 -; NO-SIMD128-NEXT: i32.and $push48=, $8, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $24, $pop101 -; NO-SIMD128-NEXT: i32.gt_u $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.select $push50=, $8, $24, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push54=, $7, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push53=, $23, $pop99 -; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.select $push56=, $7, $23, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push60=, $6, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push59=, $22, $pop97 -; NO-SIMD128-NEXT: i32.gt_u $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.select $push62=, $6, $22, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $5, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push65=, $21, $pop95 -; NO-SIMD128-NEXT: i32.gt_u $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.select $push68=, $5, $21, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push70=, $4, $pop94 +; NO-SIMD128-NEXT: i32.and $push6=, $15, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push69=, $20, $pop93 -; NO-SIMD128-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.select $push72=, $4, $20, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 +; NO-SIMD128-NEXT: i32.and $push5=, $31, $pop93 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $15, $31, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push76=, $3, $pop92 +; NO-SIMD128-NEXT: i32.and $push10=, $14, $pop92 ; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push75=, $19, $pop91 -; NO-SIMD128-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.select $push78=, $3, $19, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 +; NO-SIMD128-NEXT: i32.and $push9=, $30, $pop91 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $14, $30, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push80=, $2, $pop90 +; NO-SIMD128-NEXT: i32.and $push14=, $13, $pop90 ; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push79=, $18, $pop89 -; NO-SIMD128-NEXT: i32.gt_u $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.select $push82=, $2, $18, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 +; NO-SIMD128-NEXT: i32.and $push13=, $29, $pop89 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $13, $29, $pop15 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 ; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push84=, $1, $pop88 +; NO-SIMD128-NEXT: i32.and $push18=, $12, $pop88 ; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push83=, $17, $pop87 -; NO-SIMD128-NEXT: i32.gt_u $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.select $push86=, $1, $17, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.and $push17=, $28, $pop87 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $12, $28, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push86=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $11, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $27, $pop85 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $11, $27, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push84=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $10, $pop84 +; NO-SIMD128-NEXT: i32.const $push83=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $26, $pop83 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $10, $26, $pop27 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push82=, 255 +; NO-SIMD128-NEXT: i32.and $push30=, $9, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $25, $pop81 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $9, $25, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.const $push80=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $8, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 255 +; NO-SIMD128-NEXT: i32.and $push33=, $24, $pop79 +; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.select $push36=, $8, $24, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push78=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $7, $pop78 +; NO-SIMD128-NEXT: i32.const $push77=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $23, $pop77 +; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.select $push40=, $7, $23, $pop39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.const $push76=, 255 +; NO-SIMD128-NEXT: i32.and $push42=, $6, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $22, $pop75 +; NO-SIMD128-NEXT: i32.gt_u $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.select $push44=, $6, $22, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.const $push74=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $5, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 255 +; NO-SIMD128-NEXT: i32.and $push45=, $21, $pop73 +; NO-SIMD128-NEXT: i32.gt_u $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.select $push48=, $5, $21, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.const $push72=, 255 +; NO-SIMD128-NEXT: i32.and $push50=, $4, $pop72 +; NO-SIMD128-NEXT: i32.const $push71=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $20, $pop71 +; NO-SIMD128-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.select $push52=, $4, $20, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push54=, $3, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push53=, $19, $pop69 +; NO-SIMD128-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-NEXT: i32.select $push56=, $3, $19, $pop55 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push58=, $2, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push57=, $18, $pop67 +; NO-SIMD128-NEXT: i32.gt_u $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.select $push60=, $2, $18, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push62=, $1, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push61=, $17, $pop65 +; NO-SIMD128-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.select $push64=, $1, $17, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v16i8: @@ -1457,138 +1171,116 @@ define <16 x i8> @max_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop117 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop95 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $17, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop115 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop94 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $18, $pop93 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $18, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $19, $pop91 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $19, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop112 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop111 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop90 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $20, $pop89 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $20, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop109 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $21, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop107 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $22, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop106 -; NO-SIMD128-FAST-NEXT: i32.const $push105=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $23, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $21, $pop87 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $21, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $22, $pop85 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $22, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop83 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $24, $pop81 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $23, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $24, $pop103 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $24, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push102=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $9, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $25, $pop101 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $24, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop79 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.select $push36=, $9, $25, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $10, $pop78 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $26, $pop77 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push40=, $10, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop75 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.select $push44=, $9, $25, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $10, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $26, $pop99 +; NO-SIMD128-FAST-NEXT: i32.select $push44=, $11, $27, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $12, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $28, $pop73 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.select $push48=, $10, $26, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $11, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $27, $pop97 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.select $push54=, $11, $27, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $12, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $28, $pop95 +; NO-SIMD128-FAST-NEXT: i32.select $push48=, $12, $28, $pop47 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push50=, $13, $pop72 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $29, $pop71 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.select $push52=, $13, $29, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push54=, $14, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $30, $pop69 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.select $push56=, $14, $30, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push58=, $15, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $31, $pop67 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.select $push60=, $12, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push64=, $13, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $29, $pop93 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.select $push66=, $13, $29, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push70=, $14, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $30, $pop91 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.select $push72=, $14, $30, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push76=, $15, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $31, $pop89 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.select $push78=, $15, $31, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push82=, $16, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $32, $pop87 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.select $push84=, $16, $32, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.select $push60=, $15, $31, $pop59 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push62=, $16, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $32, $pop65 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.select $push64=, $16, $32, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <16 x i8> %x, %y %a = select <16 x i1> %c, <16 x i8> %x, <16 x i8> %y @@ -1611,156 +1303,134 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8: ; NO-SIMD128: .functype avgr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8: @@ -1771,151 +1441,129 @@ define <16 x i8> @avgr_u_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <16 x i8> %x, %y %b = add nuw <16 x i8> %a, @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: avgr_u_v16i8_wrap: ; NO-SIMD128: .functype avgr_u_v16i8_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $16, $32 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 254 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push133=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop133 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push132=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop132 -; NO-SIMD128-NEXT: i32.const $push131=, 254 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop131 -; NO-SIMD128-NEXT: i32.const $push130=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop130 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push129=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop129 -; NO-SIMD128-NEXT: i32.const $push128=, 254 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop128 -; NO-SIMD128-NEXT: i32.const $push127=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop127 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.add $push22=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push126=, 1 -; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop126 -; NO-SIMD128-NEXT: i32.const $push125=, 254 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop125 -; NO-SIMD128-NEXT: i32.const $push124=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop124 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $pop25 -; NO-SIMD128-NEXT: i32.const $push26=, 11 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.add $push28=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push123=, 1 -; NO-SIMD128-NEXT: i32.add $push29=, $pop28, $pop123 -; NO-SIMD128-NEXT: i32.const $push122=, 254 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $pop122 -; NO-SIMD128-NEXT: i32.const $push121=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push31=, $pop30, $pop121 -; NO-SIMD128-NEXT: i32.store8 0($pop27), $pop31 -; NO-SIMD128-NEXT: i32.const $push32=, 10 -; NO-SIMD128-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-NEXT: i32.add $push34=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push120=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop120 -; NO-SIMD128-NEXT: i32.const $push119=, 254 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop119 -; NO-SIMD128-NEXT: i32.const $push118=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop118 -; NO-SIMD128-NEXT: i32.store8 0($pop33), $pop37 -; NO-SIMD128-NEXT: i32.const $push38=, 9 -; NO-SIMD128-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-NEXT: i32.add $push40=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push117=, 1 -; NO-SIMD128-NEXT: i32.add $push41=, $pop40, $pop117 -; NO-SIMD128-NEXT: i32.const $push116=, 254 -; NO-SIMD128-NEXT: i32.and $push42=, $pop41, $pop116 -; NO-SIMD128-NEXT: i32.const $push115=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop115 -; NO-SIMD128-NEXT: i32.store8 0($pop39), $pop43 -; NO-SIMD128-NEXT: i32.add $push44=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push114=, 1 -; NO-SIMD128-NEXT: i32.add $push45=, $pop44, $pop114 -; NO-SIMD128-NEXT: i32.const $push113=, 254 -; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $pop113 -; NO-SIMD128-NEXT: i32.const $push112=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push47=, $pop46, $pop112 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop47 -; NO-SIMD128-NEXT: i32.const $push48=, 7 -; NO-SIMD128-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-NEXT: i32.add $push50=, $8, $24 +; NO-SIMD128-NEXT: i32.add $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 254 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 ; NO-SIMD128-NEXT: i32.const $push111=, 1 -; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop111 -; NO-SIMD128-NEXT: i32.const $push110=, 254 -; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop110 -; NO-SIMD128-NEXT: i32.const $push109=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop109 -; NO-SIMD128-NEXT: i32.store8 0($pop49), $pop53 -; NO-SIMD128-NEXT: i32.const $push54=, 6 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.add $push56=, $7, $23 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop111 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push110=, 1 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-NEXT: i32.const $push109=, 254 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop109 ; NO-SIMD128-NEXT: i32.const $push108=, 1 -; NO-SIMD128-NEXT: i32.add $push57=, $pop56, $pop108 -; NO-SIMD128-NEXT: i32.const $push107=, 254 -; NO-SIMD128-NEXT: i32.and $push58=, $pop57, $pop107 -; NO-SIMD128-NEXT: i32.const $push106=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push59=, $pop58, $pop106 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop59 -; NO-SIMD128-NEXT: i32.const $push60=, 5 -; NO-SIMD128-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-NEXT: i32.add $push62=, $6, $22 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop108 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push107=, 1 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-NEXT: i32.const $push106=, 254 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-NEXT: i32.const $push105=, 1 -; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop105 -; NO-SIMD128-NEXT: i32.const $push104=, 254 -; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop104 -; NO-SIMD128-NEXT: i32.const $push103=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop103 -; NO-SIMD128-NEXT: i32.store8 0($pop61), $pop65 -; NO-SIMD128-NEXT: i32.add $push66=, $5, $21 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push104=, 1 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-NEXT: i32.const $push103=, 254 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-NEXT: i32.const $push102=, 1 -; NO-SIMD128-NEXT: i32.add $push67=, $pop66, $pop102 -; NO-SIMD128-NEXT: i32.const $push101=, 254 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $pop101 -; NO-SIMD128-NEXT: i32.const $push100=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push69=, $pop68, $pop100 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop69 -; NO-SIMD128-NEXT: i32.const $push70=, 3 -; NO-SIMD128-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-NEXT: i32.add $push72=, $4, $20 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push101=, 1 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-NEXT: i32.const $push100=, 254 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-NEXT: i32.const $push99=, 1 -; NO-SIMD128-NEXT: i32.add $push73=, $pop72, $pop99 -; NO-SIMD128-NEXT: i32.const $push98=, 254 -; NO-SIMD128-NEXT: i32.and $push74=, $pop73, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push75=, $pop74, $pop97 -; NO-SIMD128-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-NEXT: i32.add $push76=, $3, $19 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push98=, 1 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-NEXT: i32.const $push97=, 254 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-NEXT: i32.const $push96=, 1 -; NO-SIMD128-NEXT: i32.add $push77=, $pop76, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 254 -; NO-SIMD128-NEXT: i32.and $push78=, $pop77, $pop95 -; NO-SIMD128-NEXT: i32.const $push94=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push79=, $pop78, $pop94 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop79 -; NO-SIMD128-NEXT: i32.add $push80=, $2, $18 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push95=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-NEXT: i32.const $push94=, 254 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-NEXT: i32.const $push93=, 1 -; NO-SIMD128-NEXT: i32.add $push81=, $pop80, $pop93 -; NO-SIMD128-NEXT: i32.const $push92=, 254 -; NO-SIMD128-NEXT: i32.and $push82=, $pop81, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push83=, $pop82, $pop91 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop83 -; NO-SIMD128-NEXT: i32.add $push84=, $1, $17 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push92=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-NEXT: i32.const $push91=, 254 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-NEXT: i32.const $push90=, 1 -; NO-SIMD128-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 254 -; NO-SIMD128-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-NEXT: i32.const $push88=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop87 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop33 +; NO-SIMD128-NEXT: i32.add $push34=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push89=, 1 +; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-NEXT: i32.const $push88=, 254 +; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-NEXT: i32.const $push87=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop37 +; NO-SIMD128-NEXT: i32.add $push38=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push86=, 1 +; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-NEXT: i32.const $push85=, 254 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-NEXT: i32.const $push84=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push42=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push83=, 1 +; NO-SIMD128-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-NEXT: i32.const $push82=, 254 +; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-NEXT: i32.const $push81=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop45 +; NO-SIMD128-NEXT: i32.add $push46=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push80=, 1 +; NO-SIMD128-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-NEXT: i32.const $push79=, 254 +; NO-SIMD128-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-NEXT: i32.const $push78=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop49 +; NO-SIMD128-NEXT: i32.add $push50=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push77=, 1 +; NO-SIMD128-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-NEXT: i32.const $push76=, 254 +; NO-SIMD128-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-NEXT: i32.const $push75=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop53 +; NO-SIMD128-NEXT: i32.add $push54=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push74=, 1 +; NO-SIMD128-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-NEXT: i32.const $push73=, 254 +; NO-SIMD128-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-NEXT: i32.const $push72=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop57 +; NO-SIMD128-NEXT: i32.add $push58=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push71=, 1 +; NO-SIMD128-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-NEXT: i32.const $push70=, 254 +; NO-SIMD128-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop61 +; NO-SIMD128-NEXT: i32.add $push62=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push68=, 1 +; NO-SIMD128-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 254 +; NO-SIMD128-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-NEXT: i32.const $push66=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop65 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v16i8_wrap: @@ -2109,151 +1735,129 @@ define <16 x i8> @avgr_u_v16i8_wrap(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 254 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push133=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop133 +; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop111 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push132=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop132 -; NO-SIMD128-FAST-NEXT: i32.const $push131=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop131 -; NO-SIMD128-FAST-NEXT: i32.const $push130=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop130 +; NO-SIMD128-FAST-NEXT: i32.const $push110=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push109=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop109 +; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop108 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push129=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop129 -; NO-SIMD128-FAST-NEXT: i32.const $push128=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop128 -; NO-SIMD128-FAST-NEXT: i32.const $push127=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop127 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push126=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop126 -; NO-SIMD128-FAST-NEXT: i32.const $push125=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop125 -; NO-SIMD128-FAST-NEXT: i32.const $push124=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop124 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push123=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop123 -; NO-SIMD128-FAST-NEXT: i32.const $push122=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop122 -; NO-SIMD128-FAST-NEXT: i32.const $push121=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop121 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push120=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop120 -; NO-SIMD128-FAST-NEXT: i32.const $push119=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop119 -; NO-SIMD128-FAST-NEXT: i32.const $push118=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop118 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push117=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop117 -; NO-SIMD128-FAST-NEXT: i32.const $push116=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop116 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop115 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push114=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop114 -; NO-SIMD128-FAST-NEXT: i32.const $push113=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop113 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop112 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop41 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push111=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop111 -; NO-SIMD128-FAST-NEXT: i32.const $push110=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop110 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop109 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push108=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $pop48, $pop108 -; NO-SIMD128-FAST-NEXT: i32.const $push107=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $pop49, $pop107 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push51=, $pop50, $pop106 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push53=, $0, $pop52 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push107=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop107 +; NO-SIMD128-FAST-NEXT: i32.const $push106=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop106 ; NO-SIMD128-FAST-NEXT: i32.const $push105=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop105 -; NO-SIMD128-FAST-NEXT: i32.const $push104=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop104 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop103 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop53), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop105 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.const $push104=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop104 +; NO-SIMD128-FAST-NEXT: i32.const $push103=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop103 ; NO-SIMD128-FAST-NEXT: i32.const $push102=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $pop60, $pop102 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $pop61, $pop101 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop100 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop102 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push101=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push100=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop100 ; NO-SIMD128-FAST-NEXT: i32.const $push99=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $pop66, $pop99 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push69=, $pop68, $pop97 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop69 -; NO-SIMD128-FAST-NEXT: i32.const $push70=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push71=, $0, $pop70 -; NO-SIMD128-FAST-NEXT: i32.add $push72=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop99 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push98=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop98 +; NO-SIMD128-FAST-NEXT: i32.const $push97=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop97 ; NO-SIMD128-FAST-NEXT: i32.const $push96=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push73=, $pop72, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push74=, $pop73, $pop95 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push75=, $pop74, $pop94 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop71), $pop75 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push77=, $0, $pop76 -; NO-SIMD128-FAST-NEXT: i32.add $push78=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push95=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop95 +; NO-SIMD128-FAST-NEXT: i32.const $push94=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop94 ; NO-SIMD128-FAST-NEXT: i32.const $push93=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push79=, $pop78, $pop93 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push80=, $pop79, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push81=, $pop80, $pop91 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop77), $pop81 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push83=, $0, $pop82 -; NO-SIMD128-FAST-NEXT: i32.add $push84=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop93 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push92=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop92 +; NO-SIMD128-FAST-NEXT: i32.const $push91=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop91 ; NO-SIMD128-FAST-NEXT: i32.const $push90=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push85=, $pop84, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 254 -; NO-SIMD128-FAST-NEXT: i32.and $push86=, $pop85, $pop89 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push87=, $pop86, $pop88 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop83), $pop87 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop90 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.add $push34=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push89=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push35=, $pop34, $pop89 +; NO-SIMD128-FAST-NEXT: i32.const $push88=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push36=, $pop35, $pop88 +; NO-SIMD128-FAST-NEXT: i32.const $push87=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push37=, $pop36, $pop87 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop37 +; NO-SIMD128-FAST-NEXT: i32.add $push38=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push86=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop86 +; NO-SIMD128-FAST-NEXT: i32.const $push85=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push84=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push42=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push83=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push43=, $pop42, $pop83 +; NO-SIMD128-FAST-NEXT: i32.const $push82=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $pop43, $pop82 +; NO-SIMD128-FAST-NEXT: i32.const $push81=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop81 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.add $push46=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push80=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push47=, $pop46, $pop80 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $pop79 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push49=, $pop48, $pop78 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push50=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push51=, $pop50, $pop77 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push52=, $pop51, $pop76 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop75 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop53 +; NO-SIMD128-FAST-NEXT: i32.add $push54=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push55=, $pop54, $pop74 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push56=, $pop55, $pop73 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push57=, $pop56, $pop72 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop57 +; NO-SIMD128-FAST-NEXT: i32.add $push58=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push59=, $pop58, $pop71 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push60=, $pop59, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push61=, $pop60, $pop69 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop61 +; NO-SIMD128-FAST-NEXT: i32.add $push62=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push63=, $pop62, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 254 +; NO-SIMD128-FAST-NEXT: i32.and $push64=, $pop63, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push65=, $pop64, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop65 ; NO-SIMD128-FAST-NEXT: return %a = add <16 x i8> %x, %y %b = add <16 x i8> %a, @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-LABEL: abs_v16i8: ; NO-SIMD128: .functype abs_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 15 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend8_s $push0=, $16 ; NO-SIMD128-NEXT: i32.const $push1=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop116 +; NO-SIMD128-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop94 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $15 -; NO-SIMD128-NEXT: i32.const $push115=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push114=, $pop6, $pop115 -; NO-SIMD128-NEXT: local.tee $push113=, $16=, $pop114 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop113 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 13 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $14 -; NO-SIMD128-NEXT: i32.const $push112=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push111=, $pop11, $pop112 -; NO-SIMD128-NEXT: local.tee $push110=, $16=, $pop111 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop110 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.const $push19=, 12 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push16=, $13 -; NO-SIMD128-NEXT: i32.const $push109=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push108=, $pop16, $pop109 -; NO-SIMD128-NEXT: local.tee $push107=, $16=, $pop108 -; NO-SIMD128-NEXT: i32.xor $push17=, $13, $pop107 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 11 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push21=, $12 -; NO-SIMD128-NEXT: i32.const $push106=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push105=, $pop21, $pop106 -; NO-SIMD128-NEXT: local.tee $push104=, $16=, $pop105 -; NO-SIMD128-NEXT: i32.xor $push22=, $12, $pop104 -; NO-SIMD128-NEXT: i32.sub $push23=, $pop22, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 10 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push26=, $11 -; NO-SIMD128-NEXT: i32.const $push103=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push102=, $pop26, $pop103 -; NO-SIMD128-NEXT: local.tee $push101=, $16=, $pop102 -; NO-SIMD128-NEXT: i32.xor $push27=, $11, $pop101 -; NO-SIMD128-NEXT: i32.sub $push28=, $pop27, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 9 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $10 -; NO-SIMD128-NEXT: i32.const $push100=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push99=, $pop31, $pop100 -; NO-SIMD128-NEXT: local.tee $push98=, $16=, $pop99 -; NO-SIMD128-NEXT: i32.xor $push32=, $10, $pop98 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push36=, $9 -; NO-SIMD128-NEXT: i32.const $push97=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push96=, $pop36, $pop97 -; NO-SIMD128-NEXT: local.tee $push95=, $16=, $pop96 -; NO-SIMD128-NEXT: i32.xor $push37=, $9, $pop95 -; NO-SIMD128-NEXT: i32.sub $push38=, $pop37, $16 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop38 -; NO-SIMD128-NEXT: i32.const $push94=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop94 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 ; NO-SIMD128-NEXT: i32.const $push93=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop39, $pop93 +; NO-SIMD128-NEXT: i32.shr_s $push92=, $pop4, $pop93 ; NO-SIMD128-NEXT: local.tee $push91=, $16=, $pop92 -; NO-SIMD128-NEXT: i32.xor $push40=, $8, $pop91 -; NO-SIMD128-NEXT: i32.sub $push41=, $pop40, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop41 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop91 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 ; NO-SIMD128-NEXT: i32.const $push90=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop43, $pop90 +; NO-SIMD128-NEXT: i32.shr_s $push89=, $pop7, $pop90 ; NO-SIMD128-NEXT: local.tee $push88=, $16=, $pop89 -; NO-SIMD128-NEXT: i32.xor $push44=, $7, $pop88 -; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.extend8_s $push48=, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $14, $pop88 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $16 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 ; NO-SIMD128-NEXT: i32.const $push87=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop48, $pop87 +; NO-SIMD128-NEXT: i32.shr_s $push86=, $pop10, $pop87 ; NO-SIMD128-NEXT: local.tee $push85=, $16=, $pop86 -; NO-SIMD128-NEXT: i32.xor $push49=, $6, $pop85 -; NO-SIMD128-NEXT: i32.sub $push50=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.extend8_s $push53=, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $13, $pop85 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $16 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 ; NO-SIMD128-NEXT: i32.const $push84=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop53, $pop84 +; NO-SIMD128-NEXT: i32.shr_s $push83=, $pop13, $pop84 ; NO-SIMD128-NEXT: local.tee $push82=, $16=, $pop83 -; NO-SIMD128-NEXT: i32.xor $push54=, $5, $pop82 -; NO-SIMD128-NEXT: i32.sub $push55=, $pop54, $16 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push56=, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $12, $pop82 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 ; NO-SIMD128-NEXT: i32.const $push81=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop56, $pop81 +; NO-SIMD128-NEXT: i32.shr_s $push80=, $pop16, $pop81 ; NO-SIMD128-NEXT: local.tee $push79=, $16=, $pop80 -; NO-SIMD128-NEXT: i32.xor $push57=, $4, $pop79 -; NO-SIMD128-NEXT: i32.sub $push58=, $pop57, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.extend8_s $push61=, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $11, $pop79 +; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-NEXT: i32.const $push78=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop61, $pop78 +; NO-SIMD128-NEXT: i32.shr_s $push77=, $pop19, $pop78 ; NO-SIMD128-NEXT: local.tee $push76=, $16=, $pop77 -; NO-SIMD128-NEXT: i32.xor $push62=, $3, $pop76 -; NO-SIMD128-NEXT: i32.sub $push63=, $pop62, $16 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.extend8_s $push64=, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $10, $pop76 +; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $16 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 ; NO-SIMD128-NEXT: i32.const $push75=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop64, $pop75 +; NO-SIMD128-NEXT: i32.shr_s $push74=, $pop22, $pop75 ; NO-SIMD128-NEXT: local.tee $push73=, $16=, $pop74 -; NO-SIMD128-NEXT: i32.xor $push65=, $2, $pop73 -; NO-SIMD128-NEXT: i32.sub $push66=, $pop65, $16 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $9, $pop73 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $16 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $8 ; NO-SIMD128-NEXT: i32.const $push72=, 7 -; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop67, $pop72 +; NO-SIMD128-NEXT: i32.shr_s $push71=, $pop25, $pop72 ; NO-SIMD128-NEXT: local.tee $push70=, $16=, $pop71 -; NO-SIMD128-NEXT: i32.xor $push68=, $1, $pop70 -; NO-SIMD128-NEXT: i32.sub $push69=, $pop68, $16 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.xor $push26=, $8, $pop70 +; NO-SIMD128-NEXT: i32.sub $push27=, $pop26, $16 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push28=, $7 +; NO-SIMD128-NEXT: i32.const $push69=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-NEXT: local.tee $push67=, $16=, $pop68 +; NO-SIMD128-NEXT: i32.xor $push29=, $7, $pop67 +; NO-SIMD128-NEXT: i32.sub $push30=, $pop29, $16 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $6 +; NO-SIMD128-NEXT: i32.const $push66=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-NEXT: local.tee $push64=, $16=, $pop65 +; NO-SIMD128-NEXT: i32.xor $push32=, $6, $pop64 +; NO-SIMD128-NEXT: i32.sub $push33=, $pop32, $16 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push34=, $5 +; NO-SIMD128-NEXT: i32.const $push63=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-NEXT: local.tee $push61=, $16=, $pop62 +; NO-SIMD128-NEXT: i32.xor $push35=, $5, $pop61 +; NO-SIMD128-NEXT: i32.sub $push36=, $pop35, $16 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push37=, $4 +; NO-SIMD128-NEXT: i32.const $push60=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-NEXT: local.tee $push58=, $16=, $pop59 +; NO-SIMD128-NEXT: i32.xor $push38=, $4, $pop58 +; NO-SIMD128-NEXT: i32.sub $push39=, $pop38, $16 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push40=, $3 +; NO-SIMD128-NEXT: i32.const $push57=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-NEXT: local.tee $push55=, $16=, $pop56 +; NO-SIMD128-NEXT: i32.xor $push41=, $3, $pop55 +; NO-SIMD128-NEXT: i32.sub $push42=, $pop41, $16 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push43=, $2 +; NO-SIMD128-NEXT: i32.const $push54=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-NEXT: local.tee $push52=, $16=, $pop53 +; NO-SIMD128-NEXT: i32.xor $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.sub $push45=, $pop44, $16 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push46=, $1 +; NO-SIMD128-NEXT: i32.const $push51=, 7 +; NO-SIMD128-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-NEXT: local.tee $push49=, $16=, $pop50 +; NO-SIMD128-NEXT: i32.xor $push47=, $1, $pop49 +; NO-SIMD128-NEXT: i32.sub $push48=, $pop47, $16 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v16i8: @@ -2420,138 +2002,116 @@ define <16 x i8> @abs_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push117=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push116=, $17=, $pop117 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop116 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push94=, $17=, $pop95 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop94 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push115=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push114=, $pop4, $pop115 -; NO-SIMD128-FAST-NEXT: local.tee $push113=, $1=, $pop114 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop113 +; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop4, $pop93 +; NO-SIMD128-FAST-NEXT: local.tee $push91=, $1=, $pop92 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop91 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push112=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push111=, $pop7, $pop112 -; NO-SIMD128-FAST-NEXT: local.tee $push110=, $2=, $pop111 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop110 +; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop7, $pop90 +; NO-SIMD128-FAST-NEXT: local.tee $push88=, $2=, $pop89 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop88 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push109=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push108=, $pop10, $pop109 -; NO-SIMD128-FAST-NEXT: local.tee $push107=, $3=, $pop108 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop107 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push106=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push105=, $pop15, $pop106 -; NO-SIMD128-FAST-NEXT: local.tee $push104=, $4=, $pop105 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop104 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push103=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push102=, $pop18, $pop103 -; NO-SIMD128-FAST-NEXT: local.tee $push101=, $5=, $pop102 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop101 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push99=, $pop23, $pop100 -; NO-SIMD128-FAST-NEXT: local.tee $push98=, $6=, $pop99 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop98 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop97 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push95=, $pop28, $pop96 -; NO-SIMD128-FAST-NEXT: local.tee $push94=, $7=, $pop95 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop94 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop30 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push92=, $pop32, $pop93 -; NO-SIMD128-FAST-NEXT: local.tee $push91=, $8=, $pop92 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop91 -; NO-SIMD128-FAST-NEXT: i32.sub $push34=, $pop33, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push89=, $pop35, $pop90 -; NO-SIMD128-FAST-NEXT: local.tee $push88=, $9=, $pop89 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $10, $pop88 -; NO-SIMD128-FAST-NEXT: i32.sub $push37=, $pop36, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop39), $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $11 ; NO-SIMD128-FAST-NEXT: i32.const $push87=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop40, $pop87 -; NO-SIMD128-FAST-NEXT: local.tee $push85=, $10=, $pop86 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop85 -; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push49=, $0, $pop48 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $12 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push86=, $pop10, $pop87 +; NO-SIMD128-FAST-NEXT: local.tee $push85=, $3=, $pop86 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop85 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 ; NO-SIMD128-FAST-NEXT: i32.const $push84=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop45, $pop84 -; NO-SIMD128-FAST-NEXT: local.tee $push82=, $11=, $pop83 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop82 -; NO-SIMD128-FAST-NEXT: i32.sub $push47=, $pop46, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop49), $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push50=, $13 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push83=, $pop13, $pop84 +; NO-SIMD128-FAST-NEXT: local.tee $push82=, $4=, $pop83 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop82 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 ; NO-SIMD128-FAST-NEXT: i32.const $push81=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop50, $pop81 -; NO-SIMD128-FAST-NEXT: local.tee $push79=, $12=, $pop80 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $13, $pop79 -; NO-SIMD128-FAST-NEXT: i32.sub $push52=, $pop51, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push59=, $0, $pop58 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push55=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push80=, $pop16, $pop81 +; NO-SIMD128-FAST-NEXT: local.tee $push79=, $5=, $pop80 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop79 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop55, $pop78 -; NO-SIMD128-FAST-NEXT: local.tee $push76=, $13=, $pop77 -; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $14, $pop76 -; NO-SIMD128-FAST-NEXT: i32.sub $push57=, $pop56, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop59), $pop57 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push60=, $15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push77=, $pop19, $pop78 +; NO-SIMD128-FAST-NEXT: local.tee $push76=, $6=, $pop77 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop76 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop60, $pop75 -; NO-SIMD128-FAST-NEXT: local.tee $push73=, $14=, $pop74 -; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $15, $pop73 -; NO-SIMD128-FAST-NEXT: i32.sub $push62=, $pop61, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push69=, $0, $pop68 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push65=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push74=, $pop22, $pop75 +; NO-SIMD128-FAST-NEXT: local.tee $push73=, $7=, $pop74 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop65, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $0=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $16, $pop70 -; NO-SIMD128-FAST-NEXT: i32.sub $push67=, $pop66, $0 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop69), $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push71=, $pop25, $pop72 +; NO-SIMD128-FAST-NEXT: local.tee $push70=, $8=, $pop71 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $9, $pop70 +; NO-SIMD128-FAST-NEXT: i32.sub $push27=, $pop26, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push28=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push68=, $pop28, $pop69 +; NO-SIMD128-FAST-NEXT: local.tee $push67=, $9=, $pop68 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $10, $pop67 +; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop31, $pop66 +; NO-SIMD128-FAST-NEXT: local.tee $push64=, $10=, $pop65 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $11, $pop64 +; NO-SIMD128-FAST-NEXT: i32.sub $push33=, $pop32, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push62=, $pop34, $pop63 +; NO-SIMD128-FAST-NEXT: local.tee $push61=, $11=, $pop62 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $12, $pop61 +; NO-SIMD128-FAST-NEXT: i32.sub $push36=, $pop35, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push59=, $pop37, $pop60 +; NO-SIMD128-FAST-NEXT: local.tee $push58=, $12=, $pop59 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $13, $pop58 +; NO-SIMD128-FAST-NEXT: i32.sub $push39=, $pop38, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push40=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push56=, $pop40, $pop57 +; NO-SIMD128-FAST-NEXT: local.tee $push55=, $13=, $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $14, $pop55 +; NO-SIMD128-FAST-NEXT: i32.sub $push42=, $pop41, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push43=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push53=, $pop43, $pop54 +; NO-SIMD128-FAST-NEXT: local.tee $push52=, $14=, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.sub $push45=, $pop44, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push46=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop46, $pop51 +; NO-SIMD128-FAST-NEXT: local.tee $push49=, $15=, $pop50 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $16, $pop49 +; NO-SIMD128-FAST-NEXT: i32.sub $push48=, $pop47, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> zeroinitializer, %x %b = icmp slt <16 x i8> %x, zeroinitializer @@ -2576,75 +2136,53 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype neg_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $9 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop53, $5 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop52, $3 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop51, $2 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop50, $1 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 0 -; NO-SIMD128-NEXT: i32.sub $push6=, $pop49, $16 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 0 -; NO-SIMD128-NEXT: i32.sub $push9=, $pop48, $15 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 0 -; NO-SIMD128-NEXT: i32.sub $push12=, $pop47, $14 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 0 -; NO-SIMD128-NEXT: i32.sub $push15=, $pop46, $13 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 0 -; NO-SIMD128-NEXT: i32.sub $push18=, $pop45, $12 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 0 -; NO-SIMD128-NEXT: i32.sub $push21=, $pop44, $11 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 0 -; NO-SIMD128-NEXT: i32.sub $push24=, $pop43, $10 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 0 -; NO-SIMD128-NEXT: i32.sub $push27=, $pop42, $8 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 0 -; NO-SIMD128-NEXT: i32.sub $push30=, $pop41, $7 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, 0 -; NO-SIMD128-NEXT: i32.sub $push33=, $pop40, $6 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, 0 -; NO-SIMD128-NEXT: i32.sub $push36=, $pop39, $4 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $16 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop31, $15 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop30, $14 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop29, $13 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop28, $12 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop27, $11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop26, $10 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop25, $9 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 0 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop24, $8 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 0 +; NO-SIMD128-NEXT: i32.sub $push10=, $pop23, $7 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 0 +; NO-SIMD128-NEXT: i32.sub $push11=, $pop22, $6 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 0 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop21, $5 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 0 +; NO-SIMD128-NEXT: i32.sub $push13=, $pop20, $4 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 0 +; NO-SIMD128-NEXT: i32.sub $push14=, $pop19, $3 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 0 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop18, $2 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 0 +; NO-SIMD128-NEXT: i32.sub $push16=, $pop17, $1 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v16i8: @@ -2653,73 +2191,51 @@ define <16 x i8> @neg_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop53, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop31, $2 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop52, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop30, $3 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop51, $4 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop50, $5 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop49, $6 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop48, $7 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop47, $8 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop46, $9 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop45, $10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push23=, $pop44, $11 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push26=, $pop43, $12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push29=, $pop42, $13 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push32=, $pop41, $14 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push35=, $pop40, $15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push38=, $pop39, $16 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop29, $4 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop28, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop27, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop26, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop25, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop23, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push11=, $pop22, $11 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop21, $12 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop20, $13 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push14=, $pop19, $14 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop18, $15 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $16 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = sub <16 x i8> , @@ -2744,124 +2260,80 @@ define <16 x i8> @shl_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop39 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.shl $push36=, $4, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop17 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $17 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $17 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $17 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $17 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $17 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $17 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $17 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $17 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v16i8: ; NO-SIMD128-FAST: .functype shl_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $17=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $17=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $17 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.shl $push17=, $9, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.shl $push23=, $11, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $12, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push29=, $13, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $14, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shl $push35=, $15, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $16, $17 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -2890,75 +2362,53 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128: .functype shl_const_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.shl $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, 5 -; NO-SIMD128-NEXT: i32.shl $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, 5 -; NO-SIMD128-NEXT: i32.shl $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, 5 -; NO-SIMD128-NEXT: i32.shl $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, 5 -; NO-SIMD128-NEXT: i32.shl $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, 5 -; NO-SIMD128-NEXT: i32.shl $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, 5 -; NO-SIMD128-NEXT: i32.shl $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, 5 -; NO-SIMD128-NEXT: i32.shl $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, 5 -; NO-SIMD128-NEXT: i32.shl $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push40=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop40 -; NO-SIMD128-NEXT: i32.const $push39=, 5 -; NO-SIMD128-NEXT: i32.shl $push33=, $6, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop33 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.const $push38=, 5 -; NO-SIMD128-NEXT: i32.shl $push35=, $4, $pop38 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.shl $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, 5 +; NO-SIMD128-NEXT: i32.shl $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, 5 +; NO-SIMD128-NEXT: i32.shl $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, 5 +; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, 5 +; NO-SIMD128-NEXT: i32.shl $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, 5 +; NO-SIMD128-NEXT: i32.shl $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, 5 +; NO-SIMD128-NEXT: i32.shl $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, 5 +; NO-SIMD128-NEXT: i32.shl $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, 5 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v16i8: @@ -2967,73 +2417,51 @@ define <16 x i8> @shl_const_v16i8(<16 x i8> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $6, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $7, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $8, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $9, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push19=, $10, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push25=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push31=, $14, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $15, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push37=, $16, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shl_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.shl $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.shl $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.shl $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.shl $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.shl $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.shl $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.shl $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.shl $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.shl $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.shl $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.shl $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.shl $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.shl $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.shl $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.shl $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.shl $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.shl $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.shl $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.shl $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.shl $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.shl $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.shl $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.shl $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.shl $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.shl $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.shl $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.shl $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.shl $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v16i8: @@ -3342,88 +2748,66 @@ define <16 x i8> @shl_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.shl $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shl $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shl $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shl $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shl $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shl $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.shl $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.shl $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.shl $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shl $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shl $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shl $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %a = shl <16 x i8> %v, %x ret <16 x i8> %a @@ -3445,79 +2829,57 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: shr_s_v16i8: ; NO-SIMD128: .functype shr_s_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-NEXT: local.tee $push55=, $17=, $pop56 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop55 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $5 +; NO-SIMD128-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-NEXT: local.tee $push33=, $17=, $pop34 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop33 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $15 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $14 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $13 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $12 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $16 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $11 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.extend8_s $push15=, $15 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $10 +; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $9 ; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend8_s $push19=, $14 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $8 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $13 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $6 +; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $12 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push25=, $4 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.extend8_s $push27=, $3 ; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.extend8_s $push31=, $11 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $2 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push31=, $1 ; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.extend8_s $push35=, $10 -; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.extend8_s $push39=, $8 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.extend8_s $push43=, $7 -; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.extend8_s $push51=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v16i8: @@ -3525,9 +2887,9 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $17, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push55=, $1=, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop55 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $17, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push33=, $1=, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -3535,67 +2897,45 @@ define <16 x i8> @shr_s_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $9 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $10 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $11 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $12 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $9 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $13 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push27=, $14 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push28=, $pop27, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $15 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push33=, $11 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop33, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop37, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $13 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push45=, $14 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop45, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push53=, $16 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push54=, $pop53, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $16 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -3811,108 +3151,86 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v16i8: ; NO-SIMD128: .functype shr_s_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push2=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push2=, $16 ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend8_s $push5=, $5 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop85 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 +; NO-SIMD128-NEXT: i32.extend8_s $push5=, $15 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop63 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 +; NO-SIMD128-NEXT: i32.extend8_s $push8=, $14 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop62 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend8_s $push11=, $2 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop83 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 +; NO-SIMD128-NEXT: i32.extend8_s $push11=, $13 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop61 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.extend8_s $push14=, $1 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop82 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-NEXT: i32.extend8_s $push14=, $12 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop60 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $16 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop81 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.extend8_s $push17=, $11 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop59 ; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $15 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop80 -; NO-SIMD128-NEXT: i32.shr_s $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $14 -; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop79 -; NO-SIMD128-NEXT: i32.shr_s $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.extend8_s $push32=, $13 -; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop78 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.extend8_s $push20=, $10 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop58 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.extend8_s $push23=, $9 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop57 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.extend8_s $push26=, $8 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop56 +; NO-SIMD128-NEXT: i32.shr_s $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.extend8_s $push29=, $7 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop55 +; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.extend8_s $push32=, $6 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop54 ; NO-SIMD128-NEXT: i32.shr_s $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.extend8_s $push37=, $12 -; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop77 -; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.extend8_s $push42=, $11 -; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop76 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.extend8_s $push47=, $10 -; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop75 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.extend8_s $push35=, $5 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop53 +; NO-SIMD128-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.extend8_s $push38=, $4 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop52 +; NO-SIMD128-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.extend8_s $push41=, $3 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop51 +; NO-SIMD128-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.extend8_s $push44=, $2 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop50 +; NO-SIMD128-NEXT: i32.shr_s $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.extend8_s $push47=, $1 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 ; NO-SIMD128-NEXT: i32.shr_s $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 -; NO-SIMD128-NEXT: i32.extend8_s $push52=, $8 -; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop74 -; NO-SIMD128-NEXT: i32.shr_s $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.extend8_s $push57=, $7 -; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop73 -; NO-SIMD128-NEXT: i32.shr_s $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 -; NO-SIMD128-NEXT: i32.extend8_s $push62=, $6 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop72 -; NO-SIMD128-NEXT: i32.shr_s $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 -; NO-SIMD128-NEXT: i32.extend8_s $push67=, $4 -; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_s $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v16i8: @@ -3924,102 +3242,80 @@ define <16 x i8> @shr_s_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop85 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop63 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop84 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop62 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $20, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop82 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $22, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $23, $pop80 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop60 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop58 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push26=, $9 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop56 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $24, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop29), $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push34=, $9 -; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop78 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push39=, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $26, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop39, $pop38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $27, $pop76 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $10 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push32=, $11 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push35=, $12 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push38=, $13 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop52 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push41=, $14 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push44=, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop50 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push45=, $pop44, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push49=, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $28, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push50=, $pop49, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop47), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push54=, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $29, $pop74 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop54, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push57=, $0, $pop56 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push59=, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $30, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push60=, $pop59, $pop58 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop57), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push64=, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $31, $pop72 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop65 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push67=, $0, $pop66 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push69=, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push70=, $pop69, $pop68 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop67), $pop70 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push47=, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = ashr <16 x i8> %v, %x ret <16 x i8> %a @@ -4042,94 +3338,72 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128: .functype shr_u_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-NEXT: local.tee $push70=, $17=, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop70 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, 255 -; NO-SIMD128-NEXT: i32.and $push3=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-NEXT: local.tee $push48=, $17=, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop48 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, 255 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop47 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $17 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, 255 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop46 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $17 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $2, $pop67 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, 255 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop45 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $17 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, 255 -; NO-SIMD128-NEXT: i32.and $push9=, $1, $pop66 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, 255 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop44 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $16, $pop65 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, 255 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop43 ; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, 255 -; NO-SIMD128-NEXT: i32.and $push15=, $15, $pop64 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop42 +; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $17 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, 255 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop41 ; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $14, $pop63 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $8, $pop40 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop39 ; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $13, $pop62 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 255 +; NO-SIMD128-NEXT: i32.and $push21=, $6, $pop38 +; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $17 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $5, $pop37 ; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $12, $pop61 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $4, $pop36 +; NO-SIMD128-NEXT: i32.shr_u $push26=, $pop25, $17 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, 255 +; NO-SIMD128-NEXT: i32.and $push27=, $3, $pop35 ; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $2, $pop34 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $17 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $1, $pop33 ; NO-SIMD128-NEXT: i32.shr_u $push32=, $pop31, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, 255 -; NO-SIMD128-NEXT: i32.and $push35=, $10, $pop59 -; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, 255 -; NO-SIMD128-NEXT: i32.and $push39=, $8, $pop58 -; NO-SIMD128-NEXT: i32.shr_u $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, 255 -; NO-SIMD128-NEXT: i32.and $push43=, $7, $pop57 -; NO-SIMD128-NEXT: i32.shr_u $push44=, $pop43, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $6, $pop56 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $4, $pop55 -; NO-SIMD128-NEXT: i32.shr_u $push52=, $pop51, $17 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v16i8: @@ -4137,93 +3411,71 @@ define <16 x i8> @shr_u_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $17, $pop72 -; NO-SIMD128-FAST-NEXT: local.tee $push70=, $1=, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $17, $pop50 +; NO-SIMD128-FAST-NEXT: local.tee $push48=, $1=, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop48 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop47 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop46 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop67 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop45 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop66 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop44 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop43 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop65 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop42 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop64 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop41 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $9, $pop40 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $11, $pop38 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop62 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $12, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $13, $pop36 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push26=, $pop25, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push27=, $10, $pop61 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $14, $pop35 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push28=, $pop27, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop34 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $1 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push32=, $pop31, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop59 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push39=, $13, $pop58 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push40=, $pop39, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $14, $pop57 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push44=, $pop43, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $15, $pop56 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $16, $pop55 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push52=, $pop51, $1 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <16 x i8> undef, i8 %x, i32 0 %s = shufflevector <16 x i8> %t, <16 x i8> undef, @@ -4440,123 +3692,101 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128: .functype shr_u_vec_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-NEXT: i32.const $push101=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $25, $pop101 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push100=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $5, $pop100 -; NO-SIMD128-NEXT: i32.const $push99=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $21, $pop99 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push98=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-NEXT: i32.const $push97=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push96=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $pop96 -; NO-SIMD128-NEXT: i32.const $push95=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $18, $pop95 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push94=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $1, $pop94 -; NO-SIMD128-NEXT: i32.const $push93=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $17, $pop93 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 15 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push92=, 255 -; NO-SIMD128-NEXT: i32.and $push17=, $16, $pop92 -; NO-SIMD128-NEXT: i32.const $push91=, 255 -; NO-SIMD128-NEXT: i32.and $push16=, $32, $pop91 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push24=, 14 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.const $push90=, 255 -; NO-SIMD128-NEXT: i32.and $push22=, $15, $pop90 -; NO-SIMD128-NEXT: i32.const $push89=, 255 -; NO-SIMD128-NEXT: i32.and $push21=, $31, $pop89 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push29=, 13 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push88=, 255 -; NO-SIMD128-NEXT: i32.and $push27=, $14, $pop88 -; NO-SIMD128-NEXT: i32.const $push87=, 255 -; NO-SIMD128-NEXT: i32.and $push26=, $30, $pop87 -; NO-SIMD128-NEXT: i32.shr_u $push28=, $pop27, $pop26 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push34=, 12 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push86=, 255 -; NO-SIMD128-NEXT: i32.and $push32=, $13, $pop86 -; NO-SIMD128-NEXT: i32.const $push85=, 255 -; NO-SIMD128-NEXT: i32.and $push31=, $29, $pop85 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push39=, 11 -; NO-SIMD128-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-NEXT: i32.const $push84=, 255 -; NO-SIMD128-NEXT: i32.and $push37=, $12, $pop84 -; NO-SIMD128-NEXT: i32.const $push83=, 255 -; NO-SIMD128-NEXT: i32.and $push36=, $28, $pop83 -; NO-SIMD128-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-NEXT: i32.const $push44=, 10 -; NO-SIMD128-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-NEXT: i32.const $push82=, 255 -; NO-SIMD128-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-NEXT: i32.const $push81=, 255 -; NO-SIMD128-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-NEXT: i32.const $push49=, 9 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push80=, 255 -; NO-SIMD128-NEXT: i32.and $push47=, $10, $pop80 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push79=, 255 -; NO-SIMD128-NEXT: i32.and $push46=, $26, $pop79 -; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push54=, 7 -; NO-SIMD128-NEXT: i32.add $push55=, $0, $pop54 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop79 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push78=, 255 -; NO-SIMD128-NEXT: i32.and $push52=, $8, $pop78 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop78 ; NO-SIMD128-NEXT: i32.const $push77=, 255 -; NO-SIMD128-NEXT: i32.and $push51=, $24, $pop77 -; NO-SIMD128-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-NEXT: i32.const $push59=, 6 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop77 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push76=, 255 -; NO-SIMD128-NEXT: i32.and $push57=, $7, $pop76 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop76 ; NO-SIMD128-NEXT: i32.const $push75=, 255 -; NO-SIMD128-NEXT: i32.and $push56=, $23, $pop75 -; NO-SIMD128-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.const $push64=, 5 -; NO-SIMD128-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop75 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push74=, 255 -; NO-SIMD128-NEXT: i32.and $push62=, $6, $pop74 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop74 ; NO-SIMD128-NEXT: i32.const $push73=, 255 -; NO-SIMD128-NEXT: i32.and $push61=, $22, $pop73 -; NO-SIMD128-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-NEXT: i32.const $push69=, 3 -; NO-SIMD128-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop73 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push72=, 255 -; NO-SIMD128-NEXT: i32.and $push67=, $4, $pop72 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop72 ; NO-SIMD128-NEXT: i32.const $push71=, 255 -; NO-SIMD128-NEXT: i32.and $push66=, $20, $pop71 -; NO-SIMD128-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop71 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push70=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop70 +; NO-SIMD128-NEXT: i32.const $push69=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop69 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push68=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop68 +; NO-SIMD128-NEXT: i32.const $push67=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop67 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push66=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop66 +; NO-SIMD128-NEXT: i32.const $push65=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop65 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push64=, 255 +; NO-SIMD128-NEXT: i32.and $push26=, $8, $pop64 +; NO-SIMD128-NEXT: i32.const $push63=, 255 +; NO-SIMD128-NEXT: i32.and $push25=, $24, $pop63 +; NO-SIMD128-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop27 +; NO-SIMD128-NEXT: i32.const $push62=, 255 +; NO-SIMD128-NEXT: i32.and $push29=, $7, $pop62 +; NO-SIMD128-NEXT: i32.const $push61=, 255 +; NO-SIMD128-NEXT: i32.and $push28=, $23, $pop61 +; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push60=, 255 +; NO-SIMD128-NEXT: i32.and $push32=, $6, $pop60 +; NO-SIMD128-NEXT: i32.const $push59=, 255 +; NO-SIMD128-NEXT: i32.and $push31=, $22, $pop59 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop33 +; NO-SIMD128-NEXT: i32.const $push58=, 255 +; NO-SIMD128-NEXT: i32.and $push35=, $5, $pop58 +; NO-SIMD128-NEXT: i32.const $push57=, 255 +; NO-SIMD128-NEXT: i32.and $push34=, $21, $pop57 +; NO-SIMD128-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop36 +; NO-SIMD128-NEXT: i32.const $push56=, 255 +; NO-SIMD128-NEXT: i32.and $push38=, $4, $pop56 +; NO-SIMD128-NEXT: i32.const $push55=, 255 +; NO-SIMD128-NEXT: i32.and $push37=, $20, $pop55 +; NO-SIMD128-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop39 +; NO-SIMD128-NEXT: i32.const $push54=, 255 +; NO-SIMD128-NEXT: i32.and $push41=, $3, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 255 +; NO-SIMD128-NEXT: i32.and $push40=, $19, $pop53 +; NO-SIMD128-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop42 +; NO-SIMD128-NEXT: i32.const $push52=, 255 +; NO-SIMD128-NEXT: i32.and $push44=, $2, $pop52 +; NO-SIMD128-NEXT: i32.const $push51=, 255 +; NO-SIMD128-NEXT: i32.and $push43=, $18, $pop51 +; NO-SIMD128-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop45 +; NO-SIMD128-NEXT: i32.const $push50=, 255 +; NO-SIMD128-NEXT: i32.and $push47=, $1, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 255 +; NO-SIMD128-NEXT: i32.and $push46=, $17, $pop49 +; NO-SIMD128-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop48 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v16i8: @@ -4564,122 +3794,100 @@ define <16 x i8> @shr_u_vec_v16i8(<16 x i8> %v, <16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop79 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop100 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop99 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop98 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop97 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop96 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop95 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop94 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop93 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop92 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop91 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop90 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop89 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop88 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop87 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push86=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $9, $pop86 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $25, $pop85 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push84=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $pop84 -; NO-SIMD128-FAST-NEXT: i32.const $push83=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $26, $pop83 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push38=, $pop37, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push45=, $0, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push82=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $11, $pop82 -; NO-SIMD128-FAST-NEXT: i32.const $push81=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $27, $pop81 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop45), $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push80=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $12, $pop80 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $28, $pop79 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push55=, $0, $pop54 ; NO-SIMD128-FAST-NEXT: i32.const $push78=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push52=, $13, $pop78 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop78 ; NO-SIMD128-FAST-NEXT: i32.const $push77=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $29, $pop77 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop55), $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push60=, $0, $pop59 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop77 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push76=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $14, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop76 ; NO-SIMD128-FAST-NEXT: i32.const $push75=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push56=, $30, $pop75 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push58=, $pop57, $pop56 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push65=, $0, $pop64 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop75 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push74=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push62=, $15, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop74 ; NO-SIMD128-FAST-NEXT: i32.const $push73=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push61=, $31, $pop73 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push63=, $pop62, $pop61 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop65), $pop63 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push70=, $0, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop73 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop12 ; NO-SIMD128-FAST-NEXT: i32.const $push72=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push67=, $16, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop72 ; NO-SIMD128-FAST-NEXT: i32.const $push71=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push66=, $32, $pop71 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push68=, $pop67, $pop66 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop70), $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop71 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop70 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop69 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop67 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop66 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop65 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push64=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop64 +; NO-SIMD128-FAST-NEXT: i32.const $push63=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $25, $pop63 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.const $push62=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $10, $pop62 +; NO-SIMD128-FAST-NEXT: i32.const $push61=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $26, $pop61 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push60=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $11, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push59=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $27, $pop59 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop33 +; NO-SIMD128-FAST-NEXT: i32.const $push58=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $12, $pop58 +; NO-SIMD128-FAST-NEXT: i32.const $push57=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $28, $pop57 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push36=, $pop35, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push56=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push38=, $13, $pop56 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $29, $pop55 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $14, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $30, $pop53 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push42=, $pop41, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push44=, $15, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push51=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $31, $pop51 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push45=, $pop44, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $16, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $32, $pop49 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push48=, $pop47, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop48 ; NO-SIMD128-FAST-NEXT: return %a = lshr <16 x i8> %v, %x ret <16 x i8> %a @@ -4701,60 +3909,38 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: and_v16i8: ; NO-SIMD128: .functype and_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.and $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.and $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.and $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.and $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.and $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.and $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.and $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.and $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.and $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.and $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.and $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v16i8: @@ -4766,54 +3952,32 @@ define <16 x i8> @and_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = and <16 x i8> %x, %y ret <16 x i8> %a @@ -4835,60 +3999,38 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: or_v16i8: ; NO-SIMD128: .functype or_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.or $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.or $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.or $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.or $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.or $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.or $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.or $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.or $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.or $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.or $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.or $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.or $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.or $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.or $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.or $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.or $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.or $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.or $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.or $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.or $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.or $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v16i8: @@ -4900,54 +4042,32 @@ define <16 x i8> @or_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.or $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.or $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.or $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.or $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.or $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.or $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.or $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.or $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.or $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.or $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.or $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.or $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.or $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.or $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.or $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = or <16 x i8> %x, %y ret <16 x i8> %a @@ -4969,60 +4089,38 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: xor_v16i8: ; NO-SIMD128: .functype xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $9, $25 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $21 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $19 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $18 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop3 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $17 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 15 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.xor $push5=, $16, $32 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 14 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $31 -; NO-SIMD128-NEXT: i32.store8 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 13 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.xor $push11=, $14, $30 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.xor $push14=, $13, $29 -; NO-SIMD128-NEXT: i32.store8 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push18=, 11 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push17=, $12, $28 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 10 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push20=, $11, $27 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push24=, 9 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.xor $push23=, $10, $26 -; NO-SIMD128-NEXT: i32.store8 0($pop25), $pop23 -; NO-SIMD128-NEXT: i32.const $push27=, 7 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push26=, $8, $24 -; NO-SIMD128-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.xor $push29=, $7, $23 -; NO-SIMD128-NEXT: i32.store8 0($pop31), $pop29 -; NO-SIMD128-NEXT: i32.const $push33=, 5 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push32=, $6, $22 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push36=, 3 -; NO-SIMD128-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-NEXT: i32.xor $push35=, $4, $20 -; NO-SIMD128-NEXT: i32.store8 0($pop37), $pop35 +; NO-SIMD128-NEXT: i32.xor $push0=, $16, $32 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $15, $31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $14, $30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $13, $29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $12, $28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $11, $27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $10, $26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $9, $25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop7 +; NO-SIMD128-NEXT: i32.xor $push8=, $8, $24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $7, $23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop9 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop10 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $4, $20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push13=, $3, $19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop13 +; NO-SIMD128-NEXT: i32.xor $push14=, $2, $18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $1, $17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v16i8: @@ -5034,54 +4132,32 @@ define <16 x i8> @xor_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $19 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop21), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop24), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop27), $pop28 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop30), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop33), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $20 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, %y ret <16 x i8> %a @@ -5104,75 +4180,53 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128: .functype not_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $9, $pop0 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push53=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $5, $pop53 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push52=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop52 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push51=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $2, $pop51 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push50=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $1, $pop50 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push7=, 15 -; NO-SIMD128-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-NEXT: i32.const $push49=, -1 -; NO-SIMD128-NEXT: i32.xor $push6=, $16, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop8), $pop6 -; NO-SIMD128-NEXT: i32.const $push10=, 14 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.const $push48=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $15, $pop48 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $14, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 12 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $pop46 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push19=, 11 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop45 -; NO-SIMD128-NEXT: i32.store8 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 10 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop44 -; NO-SIMD128-NEXT: i32.store8 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.const $push25=, 9 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $10, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push28=, 7 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $8, $pop42 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $7, $pop41 -; NO-SIMD128-NEXT: i32.store8 0($pop32), $pop30 -; NO-SIMD128-NEXT: i32.const $push34=, 5 -; NO-SIMD128-NEXT: i32.add $push35=, $0, $pop34 -; NO-SIMD128-NEXT: i32.const $push40=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $6, $pop40 -; NO-SIMD128-NEXT: i32.store8 0($pop35), $pop33 -; NO-SIMD128-NEXT: i32.const $push37=, 3 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push39=, -1 -; NO-SIMD128-NEXT: i32.xor $push36=, $4, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push31=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $15, $pop31 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push30=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $14, $pop30 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push29=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $13, $pop29 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push28=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $12, $pop28 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push27=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $11, $pop27 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push26=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $10, $pop26 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push25=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $9, $pop25 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push24=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $8, $pop24 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $7, $pop23 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop22 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop11 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push12=, $5, $pop21 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $4, $pop20 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop13 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $3, $pop19 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $2, $pop18 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop17 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v16i8: @@ -5181,73 +4235,51 @@ define <16 x i8> @not_v16i8(<16 x i8> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop31 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop52 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop30 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop50 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop15), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $9, $pop46 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop19), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $12, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop25), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $13, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $14, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop31), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $15, $pop40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $16, $pop39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop37), $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push24=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $9, $pop24 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $10, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $11, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = xor <16 x i8> %x, @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128: .functype andnot_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $25, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $9, $pop1 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push69=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $21, $pop69 -; NO-SIMD128-NEXT: i32.and $push4=, $5, $pop3 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push68=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $19, $pop68 -; NO-SIMD128-NEXT: i32.and $push6=, $3, $pop5 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push67=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $18, $pop67 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop7 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push66=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $17, $pop66 -; NO-SIMD128-NEXT: i32.and $push10=, $1, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push13=, 15 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.const $push65=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $32, $pop65 -; NO-SIMD128-NEXT: i32.and $push12=, $16, $pop11 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push17=, 14 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push64=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $31, $pop64 -; NO-SIMD128-NEXT: i32.and $push16=, $15, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push21=, 13 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.const $push63=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $30, $pop63 -; NO-SIMD128-NEXT: i32.and $push20=, $14, $pop19 -; NO-SIMD128-NEXT: i32.store8 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push25=, 12 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.const $push62=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $29, $pop62 -; NO-SIMD128-NEXT: i32.and $push24=, $13, $pop23 -; NO-SIMD128-NEXT: i32.store8 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.const $push61=, -1 -; NO-SIMD128-NEXT: i32.xor $push27=, $28, $pop61 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push33=, 10 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.const $push60=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $27, $pop60 -; NO-SIMD128-NEXT: i32.and $push32=, $11, $pop31 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.const $push37=, 9 -; NO-SIMD128-NEXT: i32.add $push38=, $0, $pop37 -; NO-SIMD128-NEXT: i32.const $push59=, -1 -; NO-SIMD128-NEXT: i32.xor $push35=, $26, $pop59 -; NO-SIMD128-NEXT: i32.and $push36=, $10, $pop35 -; NO-SIMD128-NEXT: i32.store8 0($pop38), $pop36 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.const $push58=, -1 -; NO-SIMD128-NEXT: i32.xor $push39=, $24, $pop58 -; NO-SIMD128-NEXT: i32.and $push40=, $8, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push45=, 6 -; NO-SIMD128-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-NEXT: i32.const $push57=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $pop57 -; NO-SIMD128-NEXT: i32.and $push44=, $7, $pop43 -; NO-SIMD128-NEXT: i32.store8 0($pop46), $pop44 -; NO-SIMD128-NEXT: i32.const $push49=, 5 -; NO-SIMD128-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-NEXT: i32.const $push56=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $22, $pop56 -; NO-SIMD128-NEXT: i32.and $push48=, $6, $pop47 -; NO-SIMD128-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-NEXT: i32.const $push53=, 3 -; NO-SIMD128-NEXT: i32.add $push54=, $0, $pop53 -; NO-SIMD128-NEXT: i32.const $push55=, -1 -; NO-SIMD128-NEXT: i32.xor $push51=, $20, $pop55 -; NO-SIMD128-NEXT: i32.and $push52=, $4, $pop51 -; NO-SIMD128-NEXT: i32.store8 0($pop54), $pop52 +; NO-SIMD128-NEXT: i32.xor $push1=, $32, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop1 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push47=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $pop47 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop3 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push46=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $30, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $14, $pop5 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push45=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $29, $pop45 +; NO-SIMD128-NEXT: i32.and $push8=, $13, $pop7 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push44=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $28, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop9 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push43=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $27, $pop43 +; NO-SIMD128-NEXT: i32.and $push12=, $11, $pop11 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push42=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $26, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $10, $pop13 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push41=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $25, $pop41 +; NO-SIMD128-NEXT: i32.and $push16=, $9, $pop15 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $24, $pop40 +; NO-SIMD128-NEXT: i32.and $push18=, $8, $pop17 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push19=, $23, $pop39 +; NO-SIMD128-NEXT: i32.and $push20=, $7, $pop19 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $22, $pop38 +; NO-SIMD128-NEXT: i32.and $push22=, $6, $pop21 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop22 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push23=, $21, $pop37 +; NO-SIMD128-NEXT: i32.and $push24=, $5, $pop23 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $20, $pop36 +; NO-SIMD128-NEXT: i32.and $push26=, $4, $pop25 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop26 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push27=, $19, $pop35 +; NO-SIMD128-NEXT: i32.and $push28=, $3, $pop27 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $18, $pop34 +; NO-SIMD128-NEXT: i32.and $push30=, $2, $pop29 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop30 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push31=, $17, $pop33 +; NO-SIMD128-NEXT: i32.and $push32=, $1, $pop31 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v16i8: @@ -5368,88 +4378,66 @@ define <16 x i8> @andnot_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $17, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop69 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $18, $pop47 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop68 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $19, $pop46 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $21, $pop66 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $pop65 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push64=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $23, $pop64 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $24, $pop63 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop22), $pop24 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $25, $pop62 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $26, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $10, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop30 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $27, $pop60 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $11, $pop33 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop32), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $28, $pop59 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop38 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $29, $pop58 -; NO-SIMD128-FAST-NEXT: i32.and $push42=, $13, $pop41 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push44=, $0, $pop43 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $30, $pop57 -; NO-SIMD128-FAST-NEXT: i32.and $push46=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop44), $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push48=, $0, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $31, $pop56 -; NO-SIMD128-FAST-NEXT: i32.and $push50=, $15, $pop49 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop48), $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $32, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push54=, $16, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop52), $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $20, $pop45 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $21, $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $22, $pop43 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $23, $pop42 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $24, $pop41 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $25, $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop17 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $10, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $11, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $12, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $29, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $13, $pop25 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $30, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $31, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $15, $pop29 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $32, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <16 x i8> %y, @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: bitselect_v16i8: ; NO-SIMD128: .functype bitselect_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $32 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $48 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $31 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $47 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $30 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $pop100 -; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $46 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $31 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $47 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $30 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $46 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $29 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $pop77 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $45 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $29 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $45 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.and $push25=, $12, $28 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push26=, $12, $pop98 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $44 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $28 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $44 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $27 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $43 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $26 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $pop74 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $42 ; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.and $push31=, $11, $27 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push32=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $43 -; NO-SIMD128-NEXT: i32.or $push34=, $pop31, $pop33 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.and $push37=, $10, $26 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $10, $pop96 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $25 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $41 +; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push33=, $8, $24 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push34=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $40 +; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push37=, $7, $23 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push38=, $7, $pop71 +; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $39 ; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.and $push43=, $9, $25 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push44=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $41 -; NO-SIMD128-NEXT: i32.or $push46=, $pop43, $pop45 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.and $push47=, $8, $24 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push48=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $40 -; NO-SIMD128-NEXT: i32.or $push50=, $pop47, $pop49 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.and $push53=, $7, $23 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push54=, $7, $pop93 -; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push41=, $6, $22 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push42=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $38 +; NO-SIMD128-NEXT: i32.or $push44=, $pop41, $pop43 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.and $push45=, $5, $21 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push46=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $37 +; NO-SIMD128-NEXT: i32.or $push48=, $pop45, $pop47 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.and $push49=, $4, $20 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push50=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $36 +; NO-SIMD128-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.and $push53=, $3, $19 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push54=, $3, $pop67 +; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $35 ; NO-SIMD128-NEXT: i32.or $push56=, $pop53, $pop55 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.and $push59=, $6, $22 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push60=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $38 -; NO-SIMD128-NEXT: i32.or $push62=, $pop59, $pop61 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.and $push65=, $5, $21 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push66=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $37 -; NO-SIMD128-NEXT: i32.or $push68=, $pop65, $pop67 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.and $push69=, $4, $20 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push70=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $36 -; NO-SIMD128-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.and $push75=, $3, $19 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push76=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $35 -; NO-SIMD128-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.and $push79=, $2, $18 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push80=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $34 -; NO-SIMD128-NEXT: i32.or $push82=, $pop79, $pop81 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.and $push83=, $1, $17 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push84=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $33 -; NO-SIMD128-NEXT: i32.or $push86=, $pop83, $pop85 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.and $push57=, $2, $18 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push58=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $34 +; NO-SIMD128-NEXT: i32.or $push60=, $pop57, $pop59 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.and $push61=, $1, $17 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push62=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $33 +; NO-SIMD128-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v16i8: @@ -5607,117 +4573,95 @@ define <16 x i8> @bitselect_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $34 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $35 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $36 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $5, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $37 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $6, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $38 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $7, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop96 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $5, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $37 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $6, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $38 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $7, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $39 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop73 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $40 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $8, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $40 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.and $push41=, $9, $25 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $9, $pop94 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $25 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $41 +; NO-SIMD128-FAST-NEXT: i32.or $push36=, $pop33, $pop35 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $10, $26 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $42 +; NO-SIMD128-FAST-NEXT: i32.or $push40=, $pop37, $pop39 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.and $push41=, $11, $27 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $11, $pop70 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $43 ; NO-SIMD128-FAST-NEXT: i32.or $push44=, $pop41, $pop43 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.and $push45=, $10, $26 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $10, $pop93 -; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.and $push45=, $12, $28 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $12, $pop69 +; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $44 ; NO-SIMD128-FAST-NEXT: i32.or $push48=, $pop45, $pop47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.and $push51=, $11, $27 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $43 -; NO-SIMD128-FAST-NEXT: i32.or $push54=, $pop51, $pop53 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.and $push57=, $12, $28 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $12, $pop91 -; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.and $push49=, $13, $29 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $45 +; NO-SIMD128-FAST-NEXT: i32.or $push52=, $pop49, $pop51 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.and $push53=, $14, $30 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $46 +; NO-SIMD128-FAST-NEXT: i32.or $push56=, $pop53, $pop55 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.and $push57=, $15, $31 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $15, $pop66 +; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $47 ; NO-SIMD128-FAST-NEXT: i32.or $push60=, $pop57, $pop59 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $13, $29 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $45 -; NO-SIMD128-FAST-NEXT: i32.or $push66=, $pop63, $pop65 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.and $push69=, $14, $30 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $46 -; NO-SIMD128-FAST-NEXT: i32.or $push72=, $pop69, $pop71 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.and $push75=, $15, $31 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $47 -; NO-SIMD128-FAST-NEXT: i32.or $push78=, $pop75, $pop77 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.and $push81=, $16, $32 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $48 -; NO-SIMD128-FAST-NEXT: i32.or $push84=, $pop81, $pop83 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.and $push61=, $16, $32 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $48 +; NO-SIMD128-FAST-NEXT: i32.or $push64=, $pop61, $pop63 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <16 x i8> %c, %v1 %inv_mask = xor <16 x i8> %c, @@ -5746,92 +4690,70 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v16i8: ; NO-SIMD128: .functype bitselect_xor_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 15 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $32, $48 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $16 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 14 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $31, $47 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $15 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 13 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $14 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 12 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.xor $push15=, $29, $45 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $13 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $pop17 -; NO-SIMD128-NEXT: i32.const $push23=, 11 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $28, $44 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $12 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push28=, 10 -; NO-SIMD128-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-NEXT: i32.xor $push25=, $27, $43 -; NO-SIMD128-NEXT: i32.and $push26=, $pop25, $11 -; NO-SIMD128-NEXT: i32.xor $push27=, $pop26, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop29), $pop27 -; NO-SIMD128-NEXT: i32.const $push33=, 9 -; NO-SIMD128-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-NEXT: i32.xor $push30=, $26, $42 -; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $10 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-NEXT: i32.xor $push35=, $25, $41 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $9 -; NO-SIMD128-NEXT: i32.xor $push37=, $pop36, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop37 -; NO-SIMD128-NEXT: i32.const $push41=, 7 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $24, $40 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $8 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.const $push46=, 6 -; NO-SIMD128-NEXT: i32.add $push47=, $0, $pop46 -; NO-SIMD128-NEXT: i32.xor $push43=, $23, $39 -; NO-SIMD128-NEXT: i32.and $push44=, $pop43, $7 -; NO-SIMD128-NEXT: i32.xor $push45=, $pop44, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop47), $pop45 -; NO-SIMD128-NEXT: i32.const $push51=, 5 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $22, $38 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $6 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.xor $push53=, $21, $37 -; NO-SIMD128-NEXT: i32.and $push54=, $pop53, $5 -; NO-SIMD128-NEXT: i32.xor $push55=, $pop54, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop55 -; NO-SIMD128-NEXT: i32.const $push59=, 3 -; NO-SIMD128-NEXT: i32.add $push60=, $0, $pop59 -; NO-SIMD128-NEXT: i32.xor $push56=, $20, $36 -; NO-SIMD128-NEXT: i32.and $push57=, $pop56, $4 -; NO-SIMD128-NEXT: i32.xor $push58=, $pop57, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop60), $pop58 -; NO-SIMD128-NEXT: i32.xor $push61=, $19, $35 -; NO-SIMD128-NEXT: i32.and $push62=, $pop61, $3 -; NO-SIMD128-NEXT: i32.xor $push63=, $pop62, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop63 -; NO-SIMD128-NEXT: i32.xor $push64=, $18, $34 -; NO-SIMD128-NEXT: i32.and $push65=, $pop64, $2 -; NO-SIMD128-NEXT: i32.xor $push66=, $pop65, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop66 -; NO-SIMD128-NEXT: i32.xor $push67=, $17, $33 -; NO-SIMD128-NEXT: i32.and $push68=, $pop67, $1 -; NO-SIMD128-NEXT: i32.xor $push69=, $pop68, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop69 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $31, $47 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $15 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $30, $46 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $14 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $29, $45 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $13 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $28, $44 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $12 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $27, $43 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $11 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $26, $42 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $10 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $25, $41 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $9 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop23 +; NO-SIMD128-NEXT: i32.xor $push24=, $24, $40 +; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $8 +; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop26 +; NO-SIMD128-NEXT: i32.xor $push27=, $23, $39 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $7 +; NO-SIMD128-NEXT: i32.xor $push29=, $pop28, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop29 +; NO-SIMD128-NEXT: i32.xor $push30=, $22, $38 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $6 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push33=, $21, $37 +; NO-SIMD128-NEXT: i32.and $push34=, $pop33, $5 +; NO-SIMD128-NEXT: i32.xor $push35=, $pop34, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop35 +; NO-SIMD128-NEXT: i32.xor $push36=, $20, $36 +; NO-SIMD128-NEXT: i32.and $push37=, $pop36, $4 +; NO-SIMD128-NEXT: i32.xor $push38=, $pop37, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop38 +; NO-SIMD128-NEXT: i32.xor $push39=, $19, $35 +; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $3 +; NO-SIMD128-NEXT: i32.xor $push41=, $pop40, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop41 +; NO-SIMD128-NEXT: i32.xor $push42=, $18, $34 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $2 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push45=, $17, $33 +; NO-SIMD128-NEXT: i32.and $push46=, $pop45, $1 +; NO-SIMD128-NEXT: i32.xor $push47=, $pop46, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop47 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v16i8: @@ -5849,80 +4771,58 @@ define <16 x i8> @bitselect_xor_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 x i8> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop31 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $pop32, $9 -; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $pop33, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop34 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.and $push38=, $pop37, $10 -; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $pop38, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop36), $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push41=, $0, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $11 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop41), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push46=, $0, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.and $push48=, $pop47, $12 -; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $pop48, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop46), $pop49 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push51=, $0, $pop50 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $13 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop51), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.and $push58=, $pop57, $14 -; NO-SIMD128-FAST-NEXT: i32.xor $push59=, $pop58, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push61=, $0, $pop60 -; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $15 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop61), $pop64 -; NO-SIMD128-FAST-NEXT: i32.const $push65=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push66=, $0, $pop65 -; NO-SIMD128-FAST-NEXT: i32.xor $push67=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.and $push68=, $pop67, $16 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $pop68, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop66), $pop69 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $20, $36 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $36 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $9 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push27=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $10 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $pop28, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $11 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $12 +; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $pop34, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $13 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push39=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $14 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $pop40, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $15 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.and $push46=, $pop45, $16 +; NO-SIMD128-FAST-NEXT: i32.xor $push47=, $pop46, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop47 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %and = and <16 x i8> %xor1, %c @@ -5949,124 +4849,102 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-LABEL: bitselect_xor_reversed_v16i8: ; NO-SIMD128: .functype bitselect_xor_reversed_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 15 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $32, $48 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $48 -; NO-SIMD128-NEXT: i32.store8 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $31, $47 -; NO-SIMD128-NEXT: i32.const $push101=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $15, $pop101 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $47 -; NO-SIMD128-NEXT: i32.store8 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 13 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $30, $46 -; NO-SIMD128-NEXT: i32.const $push100=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $14, $pop100 +; NO-SIMD128-NEXT: i32.store8 15($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $31, $47 +; NO-SIMD128-NEXT: i32.const $push79=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $15, $pop79 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $47 +; NO-SIMD128-NEXT: i32.store8 14($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $30, $46 +; NO-SIMD128-NEXT: i32.const $push78=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $14, $pop78 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $46 +; NO-SIMD128-NEXT: i32.store8 13($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $29, $45 +; NO-SIMD128-NEXT: i32.const $push77=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $13, $pop77 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $46 -; NO-SIMD128-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push23=, 12 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.xor $push20=, $29, $45 -; NO-SIMD128-NEXT: i32.const $push99=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $13, $pop99 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $45 -; NO-SIMD128-NEXT: i32.store8 0($pop24), $pop22 -; NO-SIMD128-NEXT: i32.const $push29=, 11 -; NO-SIMD128-NEXT: i32.add $push30=, $0, $pop29 -; NO-SIMD128-NEXT: i32.xor $push26=, $28, $44 -; NO-SIMD128-NEXT: i32.const $push98=, -1 -; NO-SIMD128-NEXT: i32.xor $push25=, $12, $pop98 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $45 +; NO-SIMD128-NEXT: i32.store8 12($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $28, $44 +; NO-SIMD128-NEXT: i32.const $push76=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $12, $pop76 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $44 +; NO-SIMD128-NEXT: i32.store8 11($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $27, $43 +; NO-SIMD128-NEXT: i32.const $push75=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $11, $pop75 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $43 +; NO-SIMD128-NEXT: i32.store8 10($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $26, $42 +; NO-SIMD128-NEXT: i32.const $push74=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $10, $pop74 ; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $44 -; NO-SIMD128-NEXT: i32.store8 0($pop30), $pop28 -; NO-SIMD128-NEXT: i32.const $push35=, 10 -; NO-SIMD128-NEXT: i32.add $push36=, $0, $pop35 -; NO-SIMD128-NEXT: i32.xor $push32=, $27, $43 -; NO-SIMD128-NEXT: i32.const $push97=, -1 -; NO-SIMD128-NEXT: i32.xor $push31=, $11, $pop97 -; NO-SIMD128-NEXT: i32.and $push33=, $pop32, $pop31 -; NO-SIMD128-NEXT: i32.xor $push34=, $pop33, $43 -; NO-SIMD128-NEXT: i32.store8 0($pop36), $pop34 -; NO-SIMD128-NEXT: i32.const $push41=, 9 -; NO-SIMD128-NEXT: i32.add $push42=, $0, $pop41 -; NO-SIMD128-NEXT: i32.xor $push38=, $26, $42 -; NO-SIMD128-NEXT: i32.const $push96=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $10, $pop96 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $42 +; NO-SIMD128-NEXT: i32.store8 9($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $25, $41 +; NO-SIMD128-NEXT: i32.const $push73=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $9, $pop73 +; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $41 +; NO-SIMD128-NEXT: i32.store8 8($0), $pop32 +; NO-SIMD128-NEXT: i32.xor $push34=, $24, $40 +; NO-SIMD128-NEXT: i32.const $push72=, -1 +; NO-SIMD128-NEXT: i32.xor $push33=, $8, $pop72 +; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $40 +; NO-SIMD128-NEXT: i32.store8 7($0), $pop36 +; NO-SIMD128-NEXT: i32.xor $push38=, $23, $39 +; NO-SIMD128-NEXT: i32.const $push71=, -1 +; NO-SIMD128-NEXT: i32.xor $push37=, $7, $pop71 ; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $42 -; NO-SIMD128-NEXT: i32.store8 0($pop42), $pop40 -; NO-SIMD128-NEXT: i32.xor $push44=, $25, $41 -; NO-SIMD128-NEXT: i32.const $push95=, -1 -; NO-SIMD128-NEXT: i32.xor $push43=, $9, $pop95 -; NO-SIMD128-NEXT: i32.and $push45=, $pop44, $pop43 -; NO-SIMD128-NEXT: i32.xor $push46=, $pop45, $41 -; NO-SIMD128-NEXT: i32.store8 8($0), $pop46 -; NO-SIMD128-NEXT: i32.const $push51=, 7 -; NO-SIMD128-NEXT: i32.add $push52=, $0, $pop51 -; NO-SIMD128-NEXT: i32.xor $push48=, $24, $40 -; NO-SIMD128-NEXT: i32.const $push94=, -1 -; NO-SIMD128-NEXT: i32.xor $push47=, $8, $pop94 -; NO-SIMD128-NEXT: i32.and $push49=, $pop48, $pop47 -; NO-SIMD128-NEXT: i32.xor $push50=, $pop49, $40 -; NO-SIMD128-NEXT: i32.store8 0($pop52), $pop50 -; NO-SIMD128-NEXT: i32.const $push57=, 6 -; NO-SIMD128-NEXT: i32.add $push58=, $0, $pop57 -; NO-SIMD128-NEXT: i32.xor $push54=, $23, $39 -; NO-SIMD128-NEXT: i32.const $push93=, -1 -; NO-SIMD128-NEXT: i32.xor $push53=, $7, $pop93 +; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $39 +; NO-SIMD128-NEXT: i32.store8 6($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push42=, $22, $38 +; NO-SIMD128-NEXT: i32.const $push70=, -1 +; NO-SIMD128-NEXT: i32.xor $push41=, $6, $pop70 +; NO-SIMD128-NEXT: i32.and $push43=, $pop42, $pop41 +; NO-SIMD128-NEXT: i32.xor $push44=, $pop43, $38 +; NO-SIMD128-NEXT: i32.store8 5($0), $pop44 +; NO-SIMD128-NEXT: i32.xor $push46=, $21, $37 +; NO-SIMD128-NEXT: i32.const $push69=, -1 +; NO-SIMD128-NEXT: i32.xor $push45=, $5, $pop69 +; NO-SIMD128-NEXT: i32.and $push47=, $pop46, $pop45 +; NO-SIMD128-NEXT: i32.xor $push48=, $pop47, $37 +; NO-SIMD128-NEXT: i32.store8 4($0), $pop48 +; NO-SIMD128-NEXT: i32.xor $push50=, $20, $36 +; NO-SIMD128-NEXT: i32.const $push68=, -1 +; NO-SIMD128-NEXT: i32.xor $push49=, $4, $pop68 +; NO-SIMD128-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-NEXT: i32.xor $push52=, $pop51, $36 +; NO-SIMD128-NEXT: i32.store8 3($0), $pop52 +; NO-SIMD128-NEXT: i32.xor $push54=, $19, $35 +; NO-SIMD128-NEXT: i32.const $push67=, -1 +; NO-SIMD128-NEXT: i32.xor $push53=, $3, $pop67 ; NO-SIMD128-NEXT: i32.and $push55=, $pop54, $pop53 -; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $39 -; NO-SIMD128-NEXT: i32.store8 0($pop58), $pop56 -; NO-SIMD128-NEXT: i32.const $push63=, 5 -; NO-SIMD128-NEXT: i32.add $push64=, $0, $pop63 -; NO-SIMD128-NEXT: i32.xor $push60=, $22, $38 -; NO-SIMD128-NEXT: i32.const $push92=, -1 -; NO-SIMD128-NEXT: i32.xor $push59=, $6, $pop92 -; NO-SIMD128-NEXT: i32.and $push61=, $pop60, $pop59 -; NO-SIMD128-NEXT: i32.xor $push62=, $pop61, $38 -; NO-SIMD128-NEXT: i32.store8 0($pop64), $pop62 -; NO-SIMD128-NEXT: i32.xor $push66=, $21, $37 -; NO-SIMD128-NEXT: i32.const $push91=, -1 -; NO-SIMD128-NEXT: i32.xor $push65=, $5, $pop91 -; NO-SIMD128-NEXT: i32.and $push67=, $pop66, $pop65 -; NO-SIMD128-NEXT: i32.xor $push68=, $pop67, $37 -; NO-SIMD128-NEXT: i32.store8 4($0), $pop68 -; NO-SIMD128-NEXT: i32.const $push73=, 3 -; NO-SIMD128-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-NEXT: i32.xor $push70=, $20, $36 -; NO-SIMD128-NEXT: i32.const $push90=, -1 -; NO-SIMD128-NEXT: i32.xor $push69=, $4, $pop90 -; NO-SIMD128-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-NEXT: i32.xor $push72=, $pop71, $36 -; NO-SIMD128-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-NEXT: i32.xor $push76=, $19, $35 -; NO-SIMD128-NEXT: i32.const $push89=, -1 -; NO-SIMD128-NEXT: i32.xor $push75=, $3, $pop89 -; NO-SIMD128-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-NEXT: i32.xor $push78=, $pop77, $35 -; NO-SIMD128-NEXT: i32.store8 2($0), $pop78 -; NO-SIMD128-NEXT: i32.xor $push80=, $18, $34 -; NO-SIMD128-NEXT: i32.const $push88=, -1 -; NO-SIMD128-NEXT: i32.xor $push79=, $2, $pop88 -; NO-SIMD128-NEXT: i32.and $push81=, $pop80, $pop79 -; NO-SIMD128-NEXT: i32.xor $push82=, $pop81, $34 -; NO-SIMD128-NEXT: i32.store8 1($0), $pop82 -; NO-SIMD128-NEXT: i32.xor $push84=, $17, $33 -; NO-SIMD128-NEXT: i32.const $push87=, -1 -; NO-SIMD128-NEXT: i32.xor $push83=, $1, $pop87 -; NO-SIMD128-NEXT: i32.and $push85=, $pop84, $pop83 -; NO-SIMD128-NEXT: i32.xor $push86=, $pop85, $33 -; NO-SIMD128-NEXT: i32.store8 0($0), $pop86 +; NO-SIMD128-NEXT: i32.xor $push56=, $pop55, $35 +; NO-SIMD128-NEXT: i32.store8 2($0), $pop56 +; NO-SIMD128-NEXT: i32.xor $push58=, $18, $34 +; NO-SIMD128-NEXT: i32.const $push66=, -1 +; NO-SIMD128-NEXT: i32.xor $push57=, $2, $pop66 +; NO-SIMD128-NEXT: i32.and $push59=, $pop58, $pop57 +; NO-SIMD128-NEXT: i32.xor $push60=, $pop59, $34 +; NO-SIMD128-NEXT: i32.store8 1($0), $pop60 +; NO-SIMD128-NEXT: i32.xor $push62=, $17, $33 +; NO-SIMD128-NEXT: i32.const $push65=, -1 +; NO-SIMD128-NEXT: i32.xor $push61=, $1, $pop65 +; NO-SIMD128-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-NEXT: i32.xor $push64=, $pop63, $33 +; NO-SIMD128-NEXT: i32.store8 0($0), $pop64 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v16i8: @@ -6079,117 +4957,95 @@ define <16 x i8> @bitselect_xor_reversed_v16i8(<16 x i8> %c, <16 x i8> %v1, <16 ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $33 ; NO-SIMD128-FAST-NEXT: i32.store8 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $18, $34 -; NO-SIMD128-FAST-NEXT: i32.const $push101=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop101 +; NO-SIMD128-FAST-NEXT: i32.const $push79=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop79 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $34 ; NO-SIMD128-FAST-NEXT: i32.store8 1($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $19, $35 -; NO-SIMD128-FAST-NEXT: i32.const $push100=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop100 +; NO-SIMD128-FAST-NEXT: i32.const $push78=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop78 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $35 ; NO-SIMD128-FAST-NEXT: i32.store8 2($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 3 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $20, $36 -; NO-SIMD128-FAST-NEXT: i32.const $push99=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop99 +; NO-SIMD128-FAST-NEXT: i32.const $push77=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop77 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $36 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $21, $37 -; NO-SIMD128-FAST-NEXT: i32.const $push98=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop98 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $37 -; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 5 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $22, $38 -; NO-SIMD128-FAST-NEXT: i32.const $push97=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop97 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $38 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $23, $39 -; NO-SIMD128-FAST-NEXT: i32.const $push96=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop96 +; NO-SIMD128-FAST-NEXT: i32.store8 3($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $21, $37 +; NO-SIMD128-FAST-NEXT: i32.const $push76=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop76 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $37 +; NO-SIMD128-FAST-NEXT: i32.store8 4($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $22, $38 +; NO-SIMD128-FAST-NEXT: i32.const $push75=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop75 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $38 +; NO-SIMD128-FAST-NEXT: i32.store8 5($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $23, $39 +; NO-SIMD128-FAST-NEXT: i32.const $push74=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop74 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $39 +; NO-SIMD128-FAST-NEXT: i32.store8 6($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $24, $40 +; NO-SIMD128-FAST-NEXT: i32.const $push73=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop73 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $39 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 7 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $24, $40 -; NO-SIMD128-FAST-NEXT: i32.const $push95=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop95 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $40 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop40), $pop38 -; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $25, $41 -; NO-SIMD128-FAST-NEXT: i32.const $push94=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $9, $pop94 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $40 +; NO-SIMD128-FAST-NEXT: i32.store8 7($0), $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push34=, $25, $41 +; NO-SIMD128-FAST-NEXT: i32.const $push72=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push33=, $9, $pop72 +; NO-SIMD128-FAST-NEXT: i32.and $push35=, $pop34, $pop33 +; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $pop35, $41 +; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop36 +; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $26, $42 +; NO-SIMD128-FAST-NEXT: i32.const $push71=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push37=, $10, $pop71 +; NO-SIMD128-FAST-NEXT: i32.and $push39=, $pop38, $pop37 +; NO-SIMD128-FAST-NEXT: i32.xor $push40=, $pop39, $42 +; NO-SIMD128-FAST-NEXT: i32.store8 9($0), $pop40 +; NO-SIMD128-FAST-NEXT: i32.xor $push42=, $27, $43 +; NO-SIMD128-FAST-NEXT: i32.const $push70=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push41=, $11, $pop70 ; NO-SIMD128-FAST-NEXT: i32.and $push43=, $pop42, $pop41 -; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $41 -; NO-SIMD128-FAST-NEXT: i32.store8 8($0), $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 9 -; NO-SIMD128-FAST-NEXT: i32.add $push50=, $0, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $26, $42 -; NO-SIMD128-FAST-NEXT: i32.const $push93=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $10, $pop93 +; NO-SIMD128-FAST-NEXT: i32.xor $push44=, $pop43, $43 +; NO-SIMD128-FAST-NEXT: i32.store8 10($0), $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push46=, $28, $44 +; NO-SIMD128-FAST-NEXT: i32.const $push69=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push45=, $12, $pop69 ; NO-SIMD128-FAST-NEXT: i32.and $push47=, $pop46, $pop45 -; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $42 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop50), $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push56=, $0, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $27, $43 -; NO-SIMD128-FAST-NEXT: i32.const $push92=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push51=, $11, $pop92 -; NO-SIMD128-FAST-NEXT: i32.and $push53=, $pop52, $pop51 -; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $pop53, $43 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop56), $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 11 -; NO-SIMD128-FAST-NEXT: i32.add $push62=, $0, $pop61 -; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $28, $44 -; NO-SIMD128-FAST-NEXT: i32.const $push91=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $12, $pop91 +; NO-SIMD128-FAST-NEXT: i32.xor $push48=, $pop47, $44 +; NO-SIMD128-FAST-NEXT: i32.store8 11($0), $pop48 +; NO-SIMD128-FAST-NEXT: i32.xor $push50=, $29, $45 +; NO-SIMD128-FAST-NEXT: i32.const $push68=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push49=, $13, $pop68 +; NO-SIMD128-FAST-NEXT: i32.and $push51=, $pop50, $pop49 +; NO-SIMD128-FAST-NEXT: i32.xor $push52=, $pop51, $45 +; NO-SIMD128-FAST-NEXT: i32.store8 12($0), $pop52 +; NO-SIMD128-FAST-NEXT: i32.xor $push54=, $30, $46 +; NO-SIMD128-FAST-NEXT: i32.const $push67=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push53=, $14, $pop67 +; NO-SIMD128-FAST-NEXT: i32.and $push55=, $pop54, $pop53 +; NO-SIMD128-FAST-NEXT: i32.xor $push56=, $pop55, $46 +; NO-SIMD128-FAST-NEXT: i32.store8 13($0), $pop56 +; NO-SIMD128-FAST-NEXT: i32.xor $push58=, $31, $47 +; NO-SIMD128-FAST-NEXT: i32.const $push66=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push57=, $15, $pop66 ; NO-SIMD128-FAST-NEXT: i32.and $push59=, $pop58, $pop57 -; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $44 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop62), $pop60 -; NO-SIMD128-FAST-NEXT: i32.const $push67=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push68=, $0, $pop67 -; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $29, $45 -; NO-SIMD128-FAST-NEXT: i32.const $push90=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push63=, $13, $pop90 -; NO-SIMD128-FAST-NEXT: i32.and $push65=, $pop64, $pop63 -; NO-SIMD128-FAST-NEXT: i32.xor $push66=, $pop65, $45 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop68), $pop66 -; NO-SIMD128-FAST-NEXT: i32.const $push73=, 13 -; NO-SIMD128-FAST-NEXT: i32.add $push74=, $0, $pop73 -; NO-SIMD128-FAST-NEXT: i32.xor $push70=, $30, $46 -; NO-SIMD128-FAST-NEXT: i32.const $push89=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push69=, $14, $pop89 -; NO-SIMD128-FAST-NEXT: i32.and $push71=, $pop70, $pop69 -; NO-SIMD128-FAST-NEXT: i32.xor $push72=, $pop71, $46 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop74), $pop72 -; NO-SIMD128-FAST-NEXT: i32.const $push79=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push80=, $0, $pop79 -; NO-SIMD128-FAST-NEXT: i32.xor $push76=, $31, $47 -; NO-SIMD128-FAST-NEXT: i32.const $push88=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push75=, $15, $pop88 -; NO-SIMD128-FAST-NEXT: i32.and $push77=, $pop76, $pop75 -; NO-SIMD128-FAST-NEXT: i32.xor $push78=, $pop77, $47 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop80), $pop78 -; NO-SIMD128-FAST-NEXT: i32.const $push85=, 15 -; NO-SIMD128-FAST-NEXT: i32.add $push86=, $0, $pop85 -; NO-SIMD128-FAST-NEXT: i32.xor $push82=, $32, $48 -; NO-SIMD128-FAST-NEXT: i32.const $push87=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push81=, $16, $pop87 -; NO-SIMD128-FAST-NEXT: i32.and $push83=, $pop82, $pop81 -; NO-SIMD128-FAST-NEXT: i32.xor $push84=, $pop83, $48 -; NO-SIMD128-FAST-NEXT: i32.store8 0($pop86), $pop84 +; NO-SIMD128-FAST-NEXT: i32.xor $push60=, $pop59, $47 +; NO-SIMD128-FAST-NEXT: i32.store8 14($0), $pop60 +; NO-SIMD128-FAST-NEXT: i32.xor $push62=, $32, $48 +; NO-SIMD128-FAST-NEXT: i32.const $push65=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push61=, $16, $pop65 +; NO-SIMD128-FAST-NEXT: i32.and $push63=, $pop62, $pop61 +; NO-SIMD128-FAST-NEXT: i32.xor $push64=, $pop63, $48 +; NO-SIMD128-FAST-NEXT: i32.store8 15($0), $pop64 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <16 x i8> %v1, %v2 %notc = xor <16 x i8> %c, @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: add_v8i16: ; NO-SIMD128: .functype add_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.add $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.add $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.add $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.add $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.add $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v8i16: @@ -6253,24 +5101,16 @@ define <8 x i16> @add_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.add $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.add $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y ret <8 x i16> %a @@ -6292,30 +5132,22 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: sub_v8i16: ; NO-SIMD128: .functype sub_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.sub $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.sub $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.sub $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.sub $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.sub $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.sub $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.sub $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.sub $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.sub $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v8i16: @@ -6327,24 +5159,16 @@ define <8 x i16> @sub_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> %x, %y ret <8 x i16> %a @@ -6366,30 +5190,22 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: mul_v8i16: ; NO-SIMD128: .functype mul_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.mul $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.mul $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.mul $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.mul $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.mul $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.mul $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.mul $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.mul $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.mul $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.mul $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v8i16: @@ -6401,24 +5217,16 @@ define <8 x i16> @mul_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.mul $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.mul $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = mul <8 x i16> %x, %y ret <8 x i16> %a @@ -6440,54 +5248,46 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_s_v8i16: ; NO-SIMD128: .functype min_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.lt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.lt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.lt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.lt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.lt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.lt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v8i16: @@ -6508,39 +5308,31 @@ define <8 x i16> @min_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.lt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.lt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6563,70 +5355,62 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: min_u_v8i16: ; NO-SIMD128: .functype min_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.lt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.lt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.lt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.lt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.lt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.lt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.lt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v8i16: @@ -6634,68 +5418,60 @@ define <8 x i16> @min_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.lt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.lt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6718,54 +5494,46 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_s_v8i16: ; NO-SIMD128: .functype max_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.gt_s $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $16, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push10=, 12 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $15 -; NO-SIMD128-NEXT: i32.gt_s $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $15, $pop8 -; NO-SIMD128-NEXT: i32.store16 0($pop11), $pop9 -; NO-SIMD128-NEXT: i32.const $push16=, 10 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push12=, $14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $15 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $15, $pop6 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $14 +; NO-SIMD128-NEXT: i32.gt_s $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $14, $pop10 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop11 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push12=, $13 ; NO-SIMD128-NEXT: i32.gt_s $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.select $push15=, $6, $14, $pop14 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-NEXT: i32.const $push26=, 6 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push23=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push22=, $12 -; NO-SIMD128-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.select $push25=, $4, $12, $pop24 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push28=, $11 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $13, $pop14 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $12 +; NO-SIMD128-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.select $push19=, $4, $12, $pop18 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop19 +; NO-SIMD128-NEXT: i32.extend16_s $push21=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $11 +; NO-SIMD128-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-NEXT: i32.select $push23=, $3, $11, $pop22 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop23 +; NO-SIMD128-NEXT: i32.extend16_s $push25=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push24=, $10 +; NO-SIMD128-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-NEXT: i32.select $push27=, $2, $10, $pop26 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop27 +; NO-SIMD128-NEXT: i32.extend16_s $push29=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push28=, $9 ; NO-SIMD128-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.select $push31=, $3, $11, $pop30 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push33=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push32=, $10 -; NO-SIMD128-NEXT: i32.gt_s $push34=, $pop33, $pop32 -; NO-SIMD128-NEXT: i32.select $push35=, $2, $10, $pop34 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop35 -; NO-SIMD128-NEXT: i32.extend16_s $push37=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push36=, $9 -; NO-SIMD128-NEXT: i32.gt_s $push38=, $pop37, $pop36 -; NO-SIMD128-NEXT: i32.select $push39=, $1, $9, $pop38 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop39 +; NO-SIMD128-NEXT: i32.select $push31=, $1, $9, $pop30 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v8i16: @@ -6786,39 +5554,31 @@ define <8 x i16> @max_s_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $3, $11, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $12 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $4, $12, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $13 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.select $push21=, $5, $13, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $14 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push24=, $pop23, $pop22 -; NO-SIMD128-FAST-NEXT: i32.select $push25=, $6, $14, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push32=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $0, $pop32 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $13 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.select $push19=, $5, $13, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop19 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $14 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push22=, $pop21, $pop20 +; NO-SIMD128-FAST-NEXT: i32.select $push23=, $6, $14, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop23 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push25=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push24=, $15 +; NO-SIMD128-FAST-NEXT: i32.gt_s $push26=, $pop25, $pop24 +; NO-SIMD128-FAST-NEXT: i32.select $push27=, $7, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop27 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push29=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $16 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.select $push31=, $7, $15, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop33), $pop31 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $0, $pop38 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push35=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push34=, $16 -; NO-SIMD128-FAST-NEXT: i32.gt_s $push36=, $pop35, $pop34 -; NO-SIMD128-FAST-NEXT: i32.select $push37=, $8, $16, $pop36 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop39), $pop37 +; NO-SIMD128-FAST-NEXT: i32.select $push31=, $8, $16, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop31 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6841,70 +5601,62 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: max_u_v8i16: ; NO-SIMD128: .functype max_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 -; NO-SIMD128-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop55 +; NO-SIMD128-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop47 ; NO-SIMD128-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.select $push4=, $8, $16, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $7, $pop54 -; NO-SIMD128-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $pop53 -; NO-SIMD128-NEXT: i32.gt_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.select $push10=, $7, $15, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $6, $pop52 -; NO-SIMD128-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $pop51 -; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.select $push16=, $6, $14, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-NEXT: i32.and $push20=, $5, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $pop49 -; NO-SIMD128-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $4, $pop48 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $pop47 -; NO-SIMD128-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.select $push26=, $4, $12, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push30=, $3, $pop46 +; NO-SIMD128-NEXT: i32.and $push6=, $7, $pop46 ; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $pop45 -; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.select $push32=, $3, $11, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop45 +; NO-SIMD128-NEXT: i32.gt_u $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.select $push8=, $7, $15, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 ; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push34=, $2, $pop44 +; NO-SIMD128-NEXT: i32.and $push10=, $6, $pop44 ; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $pop43 -; NO-SIMD128-NEXT: i32.gt_u $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.select $push36=, $2, $10, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $pop43 +; NO-SIMD128-NEXT: i32.gt_u $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.select $push12=, $6, $14, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 ; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push38=, $1, $pop42 +; NO-SIMD128-NEXT: i32.and $push14=, $5, $pop42 ; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $pop41 -; NO-SIMD128-NEXT: i32.gt_u $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.select $push40=, $1, $9, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $pop41 +; NO-SIMD128-NEXT: i32.gt_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.select $push16=, $5, $13, $pop15 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-NEXT: i32.and $push18=, $4, $pop40 +; NO-SIMD128-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $pop39 +; NO-SIMD128-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.select $push20=, $4, $12, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $3, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $pop37 +; NO-SIMD128-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.select $push24=, $3, $11, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-NEXT: i32.and $push26=, $2, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $pop35 +; NO-SIMD128-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.select $push28=, $2, $10, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-NEXT: i32.and $push30=, $1, $pop34 +; NO-SIMD128-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $pop33 +; NO-SIMD128-NEXT: i32.gt_u $push31=, $pop30, $pop29 +; NO-SIMD128-NEXT: i32.select $push32=, $1, $9, $pop31 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v8i16: @@ -6912,68 +5664,60 @@ define <8 x i16> @max_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop55 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.select $push4=, $1, $9, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop54 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $2, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop45 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.select $push8=, $2, $10, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop52 -; NO-SIMD128-FAST-NEXT: i32.const $push51=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $3, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $pop43 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.select $push12=, $3, $11, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $4, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $pop41 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.select $push16=, $4, $12, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push48=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $5, $pop48 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $pop47 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.select $push22=, $5, $13, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $6, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $pop45 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.select $push26=, $6, $14, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $7, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $5, $pop40 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $pop39 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.select $push20=, $5, $13, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $pop37 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.select $push24=, $6, $14, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push26=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop35 +; NO-SIMD128-FAST-NEXT: i32.gt_u $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.select $push28=, $7, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push30=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop33 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.select $push32=, $7, $15, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push36=, $8, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $pop41 -; NO-SIMD128-FAST-NEXT: i32.gt_u $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.select $push38=, $8, $16, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.select $push32=, $8, $16, $pop31 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <8 x i16> %x, %y %a = select <8 x i1> %c, <8 x i16> %x, <8 x i16> %y @@ -6996,78 +5740,70 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16: ; NO-SIMD128: .functype avgr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16: @@ -7078,73 +5814,65 @@ define <8 x i16> @avgr_u_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add nuw <8 x i16> %x, %y %b = add nuw <8 x i16> %a, @@ -7176,78 +5904,70 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: avgr_u_v8i16_wrap: ; NO-SIMD128: .functype avgr_u_v8i16_wrap (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.add $push2=, $8, $16 -; NO-SIMD128-NEXT: i32.const $push3=, 1 -; NO-SIMD128-NEXT: i32.add $push4=, $pop2, $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 65534 -; NO-SIMD128-NEXT: i32.and $push6=, $pop4, $pop5 -; NO-SIMD128-NEXT: i32.const $push63=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push7=, $pop6, $pop63 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $pop7 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.add $push10=, $7, $15 -; NO-SIMD128-NEXT: i32.const $push62=, 1 -; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop62 -; NO-SIMD128-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop61 -; NO-SIMD128-NEXT: i32.const $push60=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop60 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop13 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.add $push16=, $6, $14 -; NO-SIMD128-NEXT: i32.const $push59=, 1 -; NO-SIMD128-NEXT: i32.add $push17=, $pop16, $pop59 -; NO-SIMD128-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-NEXT: i32.and $push18=, $pop17, $pop58 -; NO-SIMD128-NEXT: i32.const $push57=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push19=, $pop18, $pop57 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-NEXT: i32.const $push56=, 1 -; NO-SIMD128-NEXT: i32.add $push21=, $pop20, $pop56 -; NO-SIMD128-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $pop55 +; NO-SIMD128-NEXT: i32.add $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.const $push1=, 1 +; NO-SIMD128-NEXT: i32.add $push2=, $pop0, $pop1 +; NO-SIMD128-NEXT: i32.const $push3=, 65534 +; NO-SIMD128-NEXT: i32.and $push4=, $pop2, $pop3 +; NO-SIMD128-NEXT: i32.const $push55=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push5=, $pop4, $pop55 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop5 +; NO-SIMD128-NEXT: i32.add $push6=, $7, $15 ; NO-SIMD128-NEXT: i32.const $push54=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push23=, $pop22, $pop54 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-NEXT: i32.const $push24=, 6 -; NO-SIMD128-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-NEXT: i32.add $push26=, $4, $12 -; NO-SIMD128-NEXT: i32.const $push53=, 1 -; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop53 -; NO-SIMD128-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop52 +; NO-SIMD128-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-NEXT: i32.const $push52=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop52 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop9 +; NO-SIMD128-NEXT: i32.add $push10=, $6, $14 ; NO-SIMD128-NEXT: i32.const $push51=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop51 -; NO-SIMD128-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-NEXT: i32.add $push30=, $3, $11 -; NO-SIMD128-NEXT: i32.const $push50=, 1 -; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop50 -; NO-SIMD128-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop49 +; NO-SIMD128-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-NEXT: i32.const $push49=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop13 +; NO-SIMD128-NEXT: i32.add $push14=, $5, $13 ; NO-SIMD128-NEXT: i32.const $push48=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop48 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop33 -; NO-SIMD128-NEXT: i32.add $push34=, $2, $10 -; NO-SIMD128-NEXT: i32.const $push47=, 1 -; NO-SIMD128-NEXT: i32.add $push35=, $pop34, $pop47 -; NO-SIMD128-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-NEXT: i32.and $push36=, $pop35, $pop46 +; NO-SIMD128-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-NEXT: i32.const $push46=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 +; NO-SIMD128-NEXT: i32.add $push18=, $4, $12 ; NO-SIMD128-NEXT: i32.const $push45=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push37=, $pop36, $pop45 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop37 -; NO-SIMD128-NEXT: i32.add $push38=, $1, $9 -; NO-SIMD128-NEXT: i32.const $push44=, 1 -; NO-SIMD128-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-NEXT: i32.const $push43=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop21 +; NO-SIMD128-NEXT: i32.add $push22=, $3, $11 ; NO-SIMD128-NEXT: i32.const $push42=, 1 -; NO-SIMD128-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop41 +; NO-SIMD128-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-NEXT: i32.const $push40=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 +; NO-SIMD128-NEXT: i32.add $push26=, $2, $10 +; NO-SIMD128-NEXT: i32.const $push39=, 1 +; NO-SIMD128-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-NEXT: i32.const $push37=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 +; NO-SIMD128-NEXT: i32.add $push30=, $1, $9 +; NO-SIMD128-NEXT: i32.const $push36=, 1 +; NO-SIMD128-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-NEXT: i32.const $push34=, 1 +; NO-SIMD128-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop33 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: avgr_u_v8i16_wrap: @@ -7258,73 +5978,65 @@ define <8 x i16> @avgr_u_v8i16_wrap(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $pop0, $pop1 ; NO-SIMD128-FAST-NEXT: i32.const $push3=, 65534 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $pop2, $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push63=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop63 +; NO-SIMD128-FAST-NEXT: i32.const $push55=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $pop4, $pop55 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop5 ; NO-SIMD128-FAST-NEXT: i32.add $push6=, $2, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push62=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop62 -; NO-SIMD128-FAST-NEXT: i32.const $push61=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop61 -; NO-SIMD128-FAST-NEXT: i32.const $push60=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop60 +; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push7=, $pop6, $pop54 +; NO-SIMD128-FAST-NEXT: i32.const $push53=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $pop7, $pop53 +; NO-SIMD128-FAST-NEXT: i32.const $push52=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop52 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.add $push10=, $3, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push59=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop59 -; NO-SIMD128-FAST-NEXT: i32.const $push58=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop58 -; NO-SIMD128-FAST-NEXT: i32.const $push57=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop57 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push56=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $pop16, $pop56 -; NO-SIMD128-FAST-NEXT: i32.const $push55=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $pop17, $pop55 -; NO-SIMD128-FAST-NEXT: i32.const $push54=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push19=, $pop18, $pop54 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop19 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push21=, $pop20, $pop53 -; NO-SIMD128-FAST-NEXT: i32.const $push52=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $pop52 ; NO-SIMD128-FAST-NEXT: i32.const $push51=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push23=, $pop22, $pop51 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push24=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push25=, $0, $pop24 -; NO-SIMD128-FAST-NEXT: i32.add $push26=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop50 -; NO-SIMD128-FAST-NEXT: i32.const $push49=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop49 +; NO-SIMD128-FAST-NEXT: i32.add $push11=, $pop10, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push50=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $pop50 +; NO-SIMD128-FAST-NEXT: i32.const $push49=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push13=, $pop12, $pop49 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop13 +; NO-SIMD128-FAST-NEXT: i32.add $push14=, $4, $12 ; NO-SIMD128-FAST-NEXT: i32.const $push48=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop48 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop25), $pop29 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push33=, $pop32, $pop47 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push34=, $pop33, $pop46 +; NO-SIMD128-FAST-NEXT: i32.add $push15=, $pop14, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push46=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop46 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.add $push18=, $5, $13 ; NO-SIMD128-FAST-NEXT: i32.const $push45=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push35=, $pop34, $pop45 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop31), $pop35 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push37=, $0, $pop36 -; NO-SIMD128-FAST-NEXT: i32.add $push38=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 1 -; NO-SIMD128-FAST-NEXT: i32.add $push39=, $pop38, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65534 -; NO-SIMD128-FAST-NEXT: i32.and $push40=, $pop39, $pop43 +; NO-SIMD128-FAST-NEXT: i32.add $push19=, $pop18, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $pop44 +; NO-SIMD128-FAST-NEXT: i32.const $push43=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop43 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.add $push22=, $6, $14 ; NO-SIMD128-FAST-NEXT: i32.const $push42=, 1 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push41=, $pop40, $pop42 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop37), $pop41 +; NO-SIMD128-FAST-NEXT: i32.add $push23=, $pop22, $pop42 +; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push24=, $pop23, $pop41 +; NO-SIMD128-FAST-NEXT: i32.const $push40=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop40 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop25 +; NO-SIMD128-FAST-NEXT: i32.add $push26=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push27=, $pop26, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push28=, $pop27, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push29=, $pop28, $pop37 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop29 +; NO-SIMD128-FAST-NEXT: i32.add $push30=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 1 +; NO-SIMD128-FAST-NEXT: i32.add $push31=, $pop30, $pop36 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65534 +; NO-SIMD128-FAST-NEXT: i32.and $push32=, $pop31, $pop35 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, 1 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push33=, $pop32, $pop34 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop33 ; NO-SIMD128-FAST-NEXT: return %a = add <8 x i16> %x, %y %b = add <8 x i16> %a, @@ -7348,70 +6060,62 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-LABEL: abs_v8i16: ; NO-SIMD128: .functype abs_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 14 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: i32.extend16_s $push0=, $8 ; NO-SIMD128-NEXT: i32.const $push1=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop54 +; NO-SIMD128-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop46 ; NO-SIMD128-NEXT: i32.sub $push3=, $pop2, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $pop3 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $7 -; NO-SIMD128-NEXT: i32.const $push53=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push52=, $pop6, $pop53 -; NO-SIMD128-NEXT: local.tee $push51=, $8=, $pop52 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop51 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push14=, 10 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $6 -; NO-SIMD128-NEXT: i32.const $push50=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push49=, $pop11, $pop50 -; NO-SIMD128-NEXT: local.tee $push48=, $8=, $pop49 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $pop48 -; NO-SIMD128-NEXT: i32.sub $push13=, $pop12, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 -; NO-SIMD128-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-NEXT: i32.const $push47=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push46=, $pop16, $pop47 -; NO-SIMD128-NEXT: local.tee $push45=, $8=, $pop46 -; NO-SIMD128-NEXT: i32.xor $push17=, $5, $pop45 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.const $push45=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-NEXT: local.tee $push43=, $8=, $pop44 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop43 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.const $push42=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-NEXT: local.tee $push40=, $8=, $pop41 +; NO-SIMD128-NEXT: i32.xor $push8=, $6, $pop40 +; NO-SIMD128-NEXT: i32.sub $push9=, $pop8, $8 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.const $push39=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-NEXT: local.tee $push37=, $8=, $pop38 +; NO-SIMD128-NEXT: i32.xor $push11=, $5, $pop37 +; NO-SIMD128-NEXT: i32.sub $push12=, $pop11, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $4 +; NO-SIMD128-NEXT: i32.const $push36=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-NEXT: local.tee $push34=, $8=, $pop35 +; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop34 +; NO-SIMD128-NEXT: i32.sub $push15=, $pop14, $8 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push16=, $3 +; NO-SIMD128-NEXT: i32.const $push33=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-NEXT: local.tee $push31=, $8=, $pop32 +; NO-SIMD128-NEXT: i32.xor $push17=, $3, $pop31 ; NO-SIMD128-NEXT: i32.sub $push18=, $pop17, $8 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop18 -; NO-SIMD128-NEXT: i32.const $push22=, 6 -; NO-SIMD128-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $4 -; NO-SIMD128-NEXT: i32.const $push44=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push43=, $pop19, $pop44 -; NO-SIMD128-NEXT: local.tee $push42=, $8=, $pop43 -; NO-SIMD128-NEXT: i32.xor $push20=, $4, $pop42 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push19=, $2 +; NO-SIMD128-NEXT: i32.const $push30=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-NEXT: local.tee $push28=, $8=, $pop29 +; NO-SIMD128-NEXT: i32.xor $push20=, $2, $pop28 ; NO-SIMD128-NEXT: i32.sub $push21=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop23), $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $3 -; NO-SIMD128-NEXT: i32.const $push41=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push40=, $pop24, $pop41 -; NO-SIMD128-NEXT: local.tee $push39=, $8=, $pop40 -; NO-SIMD128-NEXT: i32.xor $push25=, $3, $pop39 -; NO-SIMD128-NEXT: i32.sub $push26=, $pop25, $8 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push27=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push37=, $pop27, $pop38 -; NO-SIMD128-NEXT: local.tee $push36=, $8=, $pop37 -; NO-SIMD128-NEXT: i32.xor $push28=, $2, $pop36 -; NO-SIMD128-NEXT: i32.sub $push29=, $pop28, $8 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop29 -; NO-SIMD128-NEXT: i32.extend16_s $push30=, $1 -; NO-SIMD128-NEXT: i32.const $push35=, 15 -; NO-SIMD128-NEXT: i32.shr_s $push34=, $pop30, $pop35 -; NO-SIMD128-NEXT: local.tee $push33=, $8=, $pop34 -; NO-SIMD128-NEXT: i32.xor $push31=, $1, $pop33 -; NO-SIMD128-NEXT: i32.sub $push32=, $pop31, $8 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push22=, $1 +; NO-SIMD128-NEXT: i32.const $push27=, 15 +; NO-SIMD128-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-NEXT: local.tee $push25=, $8=, $pop26 +; NO-SIMD128-NEXT: i32.xor $push23=, $1, $pop25 +; NO-SIMD128-NEXT: i32.sub $push24=, $pop23, $8 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v8i16: @@ -7419,68 +6123,60 @@ define <8 x i16> @abs_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push0=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push1=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push55=, $pop0, $pop1 -; NO-SIMD128-FAST-NEXT: local.tee $push54=, $9=, $pop55 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop54 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push47=, $pop0, $pop1 +; NO-SIMD128-FAST-NEXT: local.tee $push46=, $9=, $pop47 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $1, $pop46 ; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop2, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push53=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push52=, $pop4, $pop53 -; NO-SIMD128-FAST-NEXT: local.tee $push51=, $1=, $pop52 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop51 +; NO-SIMD128-FAST-NEXT: i32.const $push45=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push44=, $pop4, $pop45 +; NO-SIMD128-FAST-NEXT: local.tee $push43=, $1=, $pop44 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop43 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push50=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push49=, $pop7, $pop50 -; NO-SIMD128-FAST-NEXT: local.tee $push48=, $2=, $pop49 -; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop48 +; NO-SIMD128-FAST-NEXT: i32.const $push42=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push41=, $pop7, $pop42 +; NO-SIMD128-FAST-NEXT: local.tee $push40=, $2=, $pop41 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $3, $pop40 ; NO-SIMD128-FAST-NEXT: i32.sub $push9=, $pop8, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push46=, $pop10, $pop47 -; NO-SIMD128-FAST-NEXT: local.tee $push45=, $3=, $pop46 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push38=, $pop10, $pop39 +; NO-SIMD128-FAST-NEXT: local.tee $push37=, $3=, $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.sub $push12=, $pop11, $3 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push43=, $pop15, $pop44 -; NO-SIMD128-FAST-NEXT: local.tee $push42=, $4=, $pop43 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $5, $pop42 -; NO-SIMD128-FAST-NEXT: i32.sub $push17=, $pop16, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push18=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push40=, $pop18, $pop41 -; NO-SIMD128-FAST-NEXT: local.tee $push39=, $5=, $pop40 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $6, $pop39 -; NO-SIMD128-FAST-NEXT: i32.sub $push20=, $pop19, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push37=, $pop23, $pop38 -; NO-SIMD128-FAST-NEXT: local.tee $push36=, $6=, $pop37 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $7, $pop36 -; NO-SIMD128-FAST-NEXT: i32.sub $push25=, $pop24, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push28=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 15 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push34=, $pop28, $pop35 -; NO-SIMD128-FAST-NEXT: local.tee $push33=, $0=, $pop34 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 -; NO-SIMD128-FAST-NEXT: i32.sub $push30=, $pop29, $0 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push35=, $pop13, $pop36 +; NO-SIMD128-FAST-NEXT: local.tee $push34=, $4=, $pop35 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $5, $pop34 +; NO-SIMD128-FAST-NEXT: i32.sub $push15=, $pop14, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop16, $pop33 +; NO-SIMD128-FAST-NEXT: local.tee $push31=, $5=, $pop32 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $6, $pop31 +; NO-SIMD128-FAST-NEXT: i32.sub $push18=, $pop17, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push29=, $pop19, $pop30 +; NO-SIMD128-FAST-NEXT: local.tee $push28=, $6=, $pop29 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.sub $push21=, $pop20, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 15 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push26=, $pop22, $pop27 +; NO-SIMD128-FAST-NEXT: local.tee $push25=, $7=, $pop26 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $8, $pop25 +; NO-SIMD128-FAST-NEXT: i32.sub $push24=, $pop23, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> zeroinitializer, %x %b = icmp slt <8 x i16> %x, zeroinitializer @@ -7505,37 +6201,29 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype neg_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $5 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop23, $3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop22, $2 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop21, $1 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 0 -; NO-SIMD128-NEXT: i32.sub $push5=, $pop20, $8 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 0 -; NO-SIMD128-NEXT: i32.sub $push8=, $pop19, $7 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 0 -; NO-SIMD128-NEXT: i32.sub $push11=, $pop18, $6 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 0 -; NO-SIMD128-NEXT: i32.sub $push14=, $pop17, $4 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $8 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 0 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop15, $7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop14, $6 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop13, $5 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 0 +; NO-SIMD128-NEXT: i32.sub $push5=, $pop12, $4 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 0 +; NO-SIMD128-NEXT: i32.sub $push6=, $pop11, $3 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 0 +; NO-SIMD128-NEXT: i32.sub $push7=, $pop10, $2 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 0 +; NO-SIMD128-NEXT: i32.sub $push8=, $pop9, $1 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v8i16: @@ -7544,35 +6232,27 @@ define <8 x i16> @neg_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop23, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop15, $2 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop22, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop14, $3 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop21, $4 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop20, $5 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push10=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push13=, $pop18, $7 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push16=, $pop17, $8 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop13, $4 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop11, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push7=, $pop10, $7 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop9, $8 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <8 x i16> , %x @@ -7596,64 +6276,48 @@ define <8 x i16> @shl_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop17 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop9 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $9 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $9 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $9 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v8i16: ; NO-SIMD128-FAST: .functype shl_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $9=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $9=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $2, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $9 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -7681,37 +6345,29 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128: .functype shl_const_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, 5 -; NO-SIMD128-NEXT: i32.shl $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, 5 -; NO-SIMD128-NEXT: i32.shl $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.shl $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, 5 -; NO-SIMD128-NEXT: i32.shl $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.shl $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, 5 +; NO-SIMD128-NEXT: i32.shl $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, 5 +; NO-SIMD128-NEXT: i32.shl $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, 5 +; NO-SIMD128-NEXT: i32.shl $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, 5 +; NO-SIMD128-NEXT: i32.shl $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, 5 +; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v8i16: @@ -7720,35 +6376,27 @@ define <8 x i16> @shl_const_v8i16(<8 x i16> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, @@ -7866,45 +6514,37 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shl_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.shl $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.shl $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.shl $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.shl $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.shl $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.shl $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.shl $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.shl $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.shl $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.shl $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.shl $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.shl $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.shl $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.shl $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.shl $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.shl $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v8i16: @@ -7914,42 +6554,34 @@ define <8 x i16> @shl_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shl $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shl $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.shl $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shl $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.shl $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shl $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shl $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %a = shl <8 x i16> %v, %x ret <8 x i16> %a @@ -7971,41 +6603,33 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: shr_s_v8i16: ; NO-SIMD128: .functype shr_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-NEXT: local.tee $push25=, $9=, $pop26 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop25 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $3 +; NO-SIMD128-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-NEXT: local.tee $push17=, $9=, $pop18 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $pop1, $pop17 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $7 ; NO-SIMD128-NEXT: i32.shr_s $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $6 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 ; NO-SIMD128-NEXT: i32.shr_s $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $8 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $4 ; NO-SIMD128-NEXT: i32.shr_s $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $3 +; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push13=, $2 ; NO-SIMD128-NEXT: i32.shr_s $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.extend16_s $push17=, $6 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.extend16_s $push21=, $4 -; NO-SIMD128-NEXT: i32.shr_s $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.extend16_s $push15=, $1 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v8i16: @@ -8013,9 +6637,9 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push1=, $1 ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push26=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push25=, $1=, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop25 +; NO-SIMD128-FAST-NEXT: i32.and $push18=, $9, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $pop1, $pop17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push3=, $2 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push4=, $pop3, $1 @@ -8023,29 +6647,21 @@ define <8 x i16> @shr_s_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $3 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push7=, $4 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push8=, $pop7, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $5 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $pop9, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $6 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $6 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $7 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push15=, $8 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $pop15, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push20=, $pop19, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8164,54 +6780,46 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v8i16: ; NO-SIMD128: .functype shr_s_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push2=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push2=, $8 ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop0 ; NO-SIMD128-NEXT: i32.shr_s $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.extend16_s $push5=, $3 -; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop39 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 +; NO-SIMD128-NEXT: i32.extend16_s $push5=, $7 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop31 ; NO-SIMD128-NEXT: i32.shr_s $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.extend16_s $push8=, $2 -; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop38 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-NEXT: i32.extend16_s $push8=, $6 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop30 ; NO-SIMD128-NEXT: i32.shr_s $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.extend16_s $push11=, $1 -; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop37 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 +; NO-SIMD128-NEXT: i32.extend16_s $push11=, $5 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop29 ; NO-SIMD128-NEXT: i32.shr_s $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.extend16_s $push14=, $8 -; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop36 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.extend16_s $push14=, $4 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop28 ; NO-SIMD128-NEXT: i32.shr_s $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.extend16_s $push19=, $7 -; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop35 -; NO-SIMD128-NEXT: i32.shr_s $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 -; NO-SIMD128-NEXT: i32.extend16_s $push24=, $6 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop34 -; NO-SIMD128-NEXT: i32.shr_s $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 -; NO-SIMD128-NEXT: i32.extend16_s $push29=, $4 -; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_s $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.extend16_s $push17=, $3 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop27 +; NO-SIMD128-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.extend16_s $push20=, $2 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop26 +; NO-SIMD128-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.extend16_s $push23=, $1 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v8i16: @@ -8223,48 +6831,40 @@ define <8 x i16> @shr_s_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push5=, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop39 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop31 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push6=, $pop5, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push8=, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop38 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop30 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push9=, $pop8, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push13=, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $12, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push14=, $pop13, $pop12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop14 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push16=, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop36 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push21=, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $14, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push22=, $pop21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop19), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push26=, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $pop34 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push27=, $pop26, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop27 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push29=, $0, $pop28 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push31=, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push32=, $pop31, $pop30 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop29), $pop32 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $4 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push14=, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop28 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push17=, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push20=, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop26 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push23=, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = ashr <8 x i16> %v, %x ret <8 x i16> %a @@ -8287,48 +6887,40 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128: .functype shr_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-NEXT: local.tee $push32=, $9=, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop32 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-NEXT: i32.and $push3=, $3, $pop31 +; NO-SIMD128-NEXT: i32.and $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-NEXT: local.tee $push24=, $9=, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $pop1, $pop24 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-NEXT: i32.and $push3=, $7, $pop23 ; NO-SIMD128-NEXT: i32.shr_u $push4=, $pop3, $9 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop30 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop22 ; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $9 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $1, $pop29 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-NEXT: i32.and $push7=, $5, $pop21 ; NO-SIMD128-NEXT: i32.shr_u $push8=, $pop7, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-NEXT: i32.and $push9=, $8, $pop28 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-NEXT: i32.and $push9=, $4, $pop20 ; NO-SIMD128-NEXT: i32.shr_u $push10=, $pop9, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $7, $pop27 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $3, $pop19 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $9 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $2, $pop18 ; NO-SIMD128-NEXT: i32.shr_u $push14=, $pop13, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-NEXT: i32.and $push17=, $6, $pop26 -; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-NEXT: i32.and $push21=, $4, $pop25 -; NO-SIMD128-NEXT: i32.shr_u $push22=, $pop21, $9 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-NEXT: i32.and $push15=, $1, $pop17 +; NO-SIMD128-NEXT: i32.shr_u $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v8i16: @@ -8336,47 +6928,39 @@ define <8 x i16> @shr_u_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push1=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push33=, $9, $pop34 -; NO-SIMD128-FAST-NEXT: local.tee $push32=, $1=, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $9, $pop26 +; NO-SIMD128-FAST-NEXT: local.tee $push24=, $1=, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $pop1, $pop24 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $2, $pop23 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $3, $pop22 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $1 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop29 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $4, $pop21 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push8=, $pop7, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $5, $pop28 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push9=, $5, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push10=, $pop9, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $6, $pop19 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push15=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push13=, $6, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $7, $pop18 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push14=, $pop13, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push17=, $7, $pop26 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $8, $pop25 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push22=, $pop21, $1 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $pop17 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push16=, $pop15, $1 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <8 x i16> undef, i16 %x, i32 0 %s = shufflevector <8 x i16> %t, <8 x i16> undef, @@ -8496,61 +7080,53 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128: .functype shr_u_vec_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $13, $pop47 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop45 -; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop43 -; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop41 -; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-NEXT: i32.and $push13=, $16, $pop39 -; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop39 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-NEXT: i32.and $push18=, $15, $pop37 -; NO-SIMD128-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop37 +; NO-SIMD128-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-NEXT: i32.and $push23=, $14, $pop35 -; NO-SIMD128-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop35 +; NO-SIMD128-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-NEXT: i32.and $push28=, $12, $pop33 -; NO-SIMD128-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop33 +; NO-SIMD128-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-NEXT: i32.and $push13=, $12, $pop31 +; NO-SIMD128-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-NEXT: i32.and $push16=, $11, $pop29 +; NO-SIMD128-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-NEXT: i32.and $push19=, $10, $pop27 +; NO-SIMD128-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-NEXT: i32.and $push22=, $9, $pop25 +; NO-SIMD128-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v8i16: @@ -8558,60 +7134,52 @@ define <8 x i16> @shr_u_vec_v8i16(<8 x i16> %v, <8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop39 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop45 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop43 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop41 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $13, $pop39 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $14, $pop37 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop37 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $15, $pop35 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop35 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $16, $pop33 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop33 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $13, $pop31 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $14, $pop29 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $15, $pop27 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $16, $pop25 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %a = lshr <8 x i16> %v, %x ret <8 x i16> %a @@ -8633,30 +7201,22 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: and_v8i16: ; NO-SIMD128: .functype and_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.and $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.and $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.and $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.and $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.and $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.and $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v8i16: @@ -8668,24 +7228,16 @@ define <8 x i16> @and_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.and $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.and $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = and <8 x i16> %x, %y ret <8 x i16> %a @@ -8707,30 +7259,22 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: or_v8i16: ; NO-SIMD128: .functype or_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.or $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.or $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.or $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.or $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.or $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.or $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.or $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.or $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.or $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.or $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v8i16: @@ -8742,24 +7286,16 @@ define <8 x i16> @or_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.or $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.or $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.or $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.or $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.or $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.or $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.or $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = or <8 x i16> %x, %y ret <8 x i16> %a @@ -8781,30 +7317,22 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: xor_v8i16: ; NO-SIMD128: .functype xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $5, $13 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $11 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $10 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: i32.xor $push4=, $8, $16 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $15 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push11=, 10 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push10=, $6, $14 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push14=, 6 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.xor $push13=, $4, $12 -; NO-SIMD128-NEXT: i32.store16 0($pop15), $pop13 +; NO-SIMD128-NEXT: i32.xor $push0=, $8, $16 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $7, $15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $6, $14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $5, $13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 +; NO-SIMD128-NEXT: i32.xor $push4=, $4, $12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $2, $10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v8i16: @@ -8816,24 +7344,16 @@ define <8 x i16> @xor_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $12 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop4), $pop5 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $5, $13 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $6, $14 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push10=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $7, $15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop11), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $8, $16 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop15 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $12 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop3 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $5, $13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $6, $14 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $7, $15 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $16 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, %y ret <8 x i16> %a @@ -8856,37 +7376,29 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128: .functype not_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $5, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push23=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop23 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push22=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop22 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push6=, 14 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $8, $pop20 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $pop5 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop19 -; NO-SIMD128-NEXT: i32.store16 0($pop10), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 10 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.const $push18=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $6, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop13), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 6 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push17=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $4, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop1 +; NO-SIMD128-NEXT: i32.const $push15=, -1 +; NO-SIMD128-NEXT: i32.xor $push2=, $7, $pop15 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push14=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop14 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push13=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $5, $pop13 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push12=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $4, $pop12 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop5 +; NO-SIMD128-NEXT: i32.const $push11=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop10 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop7 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push8=, $1, $pop9 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v8i16: @@ -8895,35 +7407,27 @@ define <8 x i16> @not_v8i16(<8 x i16> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop23 +; NO-SIMD128-FAST-NEXT: i32.const $push15=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop22 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop14 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop5), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $5, $pop20 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $6, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop9), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $7, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop12), $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push14=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $8, $pop17 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop15), $pop16 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop4 +; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $5, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop5 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $7, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop7 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = xor <8 x i16> %x, @@ -8948,45 +7452,37 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128: .functype andnot_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $13, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push31=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $11, $pop31 -; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push30=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $10, $pop30 -; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push29=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $9, $pop29 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push11=, 14 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.const $push28=, -1 -; NO-SIMD128-NEXT: i32.xor $push9=, $16, $pop28 -; NO-SIMD128-NEXT: i32.and $push10=, $8, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push15=, 12 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.const $push27=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $15, $pop27 -; NO-SIMD128-NEXT: i32.and $push14=, $7, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push19=, 10 -; NO-SIMD128-NEXT: i32.add $push20=, $0, $pop19 -; NO-SIMD128-NEXT: i32.const $push26=, -1 -; NO-SIMD128-NEXT: i32.xor $push17=, $14, $pop26 -; NO-SIMD128-NEXT: i32.and $push18=, $6, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop20), $pop18 -; NO-SIMD128-NEXT: i32.const $push23=, 6 -; NO-SIMD128-NEXT: i32.add $push24=, $0, $pop23 -; NO-SIMD128-NEXT: i32.const $push25=, -1 -; NO-SIMD128-NEXT: i32.xor $push21=, $12, $pop25 -; NO-SIMD128-NEXT: i32.and $push22=, $4, $pop21 -; NO-SIMD128-NEXT: i32.store16 0($pop24), $pop22 +; NO-SIMD128-NEXT: i32.xor $push1=, $16, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push23=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $pop23 +; NO-SIMD128-NEXT: i32.and $push4=, $7, $pop3 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push22=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $14, $pop22 +; NO-SIMD128-NEXT: i32.and $push6=, $6, $pop5 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push21=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $13, $pop21 +; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop7 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push20=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $12, $pop20 +; NO-SIMD128-NEXT: i32.and $push10=, $4, $pop9 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop10 +; NO-SIMD128-NEXT: i32.const $push19=, -1 +; NO-SIMD128-NEXT: i32.xor $push11=, $11, $pop19 +; NO-SIMD128-NEXT: i32.and $push12=, $3, $pop11 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $10, $pop18 +; NO-SIMD128-NEXT: i32.and $push14=, $2, $pop13 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop14 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push15=, $9, $pop17 +; NO-SIMD128-NEXT: i32.and $push16=, $1, $pop15 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v8i16: @@ -8996,42 +7492,34 @@ define <8 x i16> @andnot_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $9, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop31 +; NO-SIMD128-FAST-NEXT: i32.const $push23=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $10, $pop23 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push30=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push22=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $11, $pop22 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push29=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $pop29 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop8), $pop10 -; NO-SIMD128-FAST-NEXT: i32.const $push28=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $13, $pop28 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $5, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $6, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $15, $pop26 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push25=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $16, $pop25 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $8, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop24 +; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $12, $pop21 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $13, $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $5, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $14, $pop19 +; NO-SIMD128-FAST-NEXT: i32.and $push12=, $6, $pop11 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $15, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $7, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $16, $pop17 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $8, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <8 x i16> %y, @@ -9058,62 +7546,54 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: bitselect_v8i16: ; NO-SIMD128: .functype bitselect_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.and $push0=, $16, $8 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $24, $pop2 ; NO-SIMD128-NEXT: i32.or $push4=, $pop0, $pop3 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.and $push7=, $15, $7 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $23, $pop8 -; NO-SIMD128-NEXT: i32.or $push10=, $pop7, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.and $push13=, $14, $6 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push14=, $6, $pop46 -; NO-SIMD128-NEXT: i32.and $push15=, $22, $pop14 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $7 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $23, $pop6 +; NO-SIMD128-NEXT: i32.or $push8=, $pop5, $pop7 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.and $push9=, $14, $6 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $22, $pop10 +; NO-SIMD128-NEXT: i32.or $push12=, $pop9, $pop11 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.and $push13=, $13, $5 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $pop37 +; NO-SIMD128-NEXT: i32.and $push15=, $21, $pop14 ; NO-SIMD128-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push20=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.and $push23=, $12, $4 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push24=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $20, $pop24 -; NO-SIMD128-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.and $push29=, $11, $3 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push30=, $3, $pop43 -; NO-SIMD128-NEXT: i32.and $push31=, $19, $pop30 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.and $push17=, $12, $4 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push18=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $20, $pop18 +; NO-SIMD128-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.and $push21=, $11, $3 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push22=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $19, $pop22 +; NO-SIMD128-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.and $push25=, $10, $2 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push26=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $18, $pop26 +; NO-SIMD128-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.and $push29=, $9, $1 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push30=, $1, $pop33 +; NO-SIMD128-NEXT: i32.and $push31=, $17, $pop30 ; NO-SIMD128-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.and $push33=, $10, $2 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push34=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $18, $pop34 -; NO-SIMD128-NEXT: i32.or $push36=, $pop33, $pop35 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.and $push37=, $9, $1 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push38=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $17, $pop38 -; NO-SIMD128-NEXT: i32.or $push40=, $pop37, $pop39 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v8i16: @@ -9126,55 +7606,47 @@ define <8 x i16> @bitselect_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop0, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $2 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $18, $pop6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop5, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $11, $3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $19, $pop10 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop9, $pop11 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $12, $4 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $20, $pop14 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop13, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $13, $5 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $21, $pop20 -; NO-SIMD128-FAST-NEXT: i32.or $push22=, $pop19, $pop21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $14, $6 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $22, $pop24 -; NO-SIMD128-FAST-NEXT: i32.or $push26=, $pop23, $pop25 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $15, $7 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $7, $pop42 -; NO-SIMD128-FAST-NEXT: i32.and $push31=, $23, $pop30 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $13, $5 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $21, $pop18 +; NO-SIMD128-FAST-NEXT: i32.or $push20=, $pop17, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.and $push21=, $14, $6 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $22, $pop22 +; NO-SIMD128-FAST-NEXT: i32.or $push24=, $pop21, $pop23 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.and $push25=, $15, $7 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $23, $pop26 +; NO-SIMD128-FAST-NEXT: i32.or $push28=, $pop25, $pop27 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $8 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $8, $pop33 +; NO-SIMD128-FAST-NEXT: i32.and $push31=, $24, $pop30 ; NO-SIMD128-FAST-NEXT: i32.or $push32=, $pop29, $pop31 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.and $push35=, $16, $8 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $24, $pop36 -; NO-SIMD128-FAST-NEXT: i32.or $push38=, $pop35, $pop37 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <8 x i16> %v1, %c %inv_mask = xor <8 x i16> @@ -9203,46 +7675,38 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v8i16: ; NO-SIMD128: .functype bitselect_xor_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 14 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $16, $24 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $8 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.xor $push5=, $15, $23 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $7 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop9), $pop7 -; NO-SIMD128-NEXT: i32.const $push13=, 10 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 -; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $6 -; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-NEXT: i32.xor $push15=, $13, $21 -; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $5 -; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-NEXT: i32.const $push21=, 6 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 -; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 -; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $4 -; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.xor $push23=, $11, $19 -; NO-SIMD128-NEXT: i32.and $push24=, $pop23, $3 -; NO-SIMD128-NEXT: i32.xor $push25=, $pop24, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop25 -; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 -; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $2 -; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 -; NO-SIMD128-NEXT: i32.xor $push29=, $9, $17 -; NO-SIMD128-NEXT: i32.and $push30=, $pop29, $1 -; NO-SIMD128-NEXT: i32.xor $push31=, $pop30, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop31 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $15, $23 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $7 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $14, $22 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $6 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $13, $21 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $5 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.xor $push12=, $12, $20 +; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $4 +; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.xor $push15=, $11, $19 +; NO-SIMD128-NEXT: i32.and $push16=, $pop15, $3 +; NO-SIMD128-NEXT: i32.xor $push17=, $pop16, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.xor $push18=, $10, $18 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $2 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push21=, $9, $17 +; NO-SIMD128-NEXT: i32.and $push22=, $pop21, $1 +; NO-SIMD128-NEXT: i32.xor $push23=, $pop22, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v8i16: @@ -9260,34 +7724,26 @@ define <8 x i16> @bitselect_xor_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x i16> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $5 -; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.and $push20=, $pop19, $6 -; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $pop20, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $7 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.and $push30=, $pop29, $8 -; NO-SIMD128-FAST-NEXT: i32.xor $push31=, $pop30, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $12, $20 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $20 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $pop12, $5 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $pop13, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.xor $push15=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $pop15, $6 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $pop16, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $7 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $pop21, $8 +; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $pop22, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %and = and <8 x i16> %xor1, %c @@ -9314,62 +7770,54 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v8i16: ; NO-SIMD128: .functype bitselect_xor_reversed_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 14 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $16, $24 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $24 -; NO-SIMD128-NEXT: i32.store16 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push11=, 12 -; NO-SIMD128-NEXT: i32.add $push12=, $0, $pop11 -; NO-SIMD128-NEXT: i32.xor $push8=, $15, $23 -; NO-SIMD128-NEXT: i32.const $push47=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $7, $pop47 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $23 -; NO-SIMD128-NEXT: i32.store16 0($pop12), $pop10 -; NO-SIMD128-NEXT: i32.const $push17=, 10 -; NO-SIMD128-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-NEXT: i32.xor $push14=, $14, $22 -; NO-SIMD128-NEXT: i32.const $push46=, -1 -; NO-SIMD128-NEXT: i32.xor $push13=, $6, $pop46 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $15, $23 +; NO-SIMD128-NEXT: i32.const $push39=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $7, $pop39 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $23 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $14, $22 +; NO-SIMD128-NEXT: i32.const $push38=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $6, $pop38 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $22 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $13, $21 +; NO-SIMD128-NEXT: i32.const $push37=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $5, $pop37 ; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $22 -; NO-SIMD128-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-NEXT: i32.const $push45=, -1 -; NO-SIMD128-NEXT: i32.xor $push19=, $5, $pop45 -; NO-SIMD128-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-NEXT: i32.const $push27=, 6 -; NO-SIMD128-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-NEXT: i32.xor $push24=, $12, $20 -; NO-SIMD128-NEXT: i32.const $push44=, -1 -; NO-SIMD128-NEXT: i32.xor $push23=, $4, $pop44 -; NO-SIMD128-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.xor $push26=, $pop25, $20 -; NO-SIMD128-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-NEXT: i32.xor $push30=, $11, $19 -; NO-SIMD128-NEXT: i32.const $push43=, -1 -; NO-SIMD128-NEXT: i32.xor $push29=, $3, $pop43 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $21 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop16 +; NO-SIMD128-NEXT: i32.xor $push18=, $12, $20 +; NO-SIMD128-NEXT: i32.const $push36=, -1 +; NO-SIMD128-NEXT: i32.xor $push17=, $4, $pop36 +; NO-SIMD128-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-NEXT: i32.xor $push20=, $pop19, $20 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop20 +; NO-SIMD128-NEXT: i32.xor $push22=, $11, $19 +; NO-SIMD128-NEXT: i32.const $push35=, -1 +; NO-SIMD128-NEXT: i32.xor $push21=, $3, $pop35 +; NO-SIMD128-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.xor $push24=, $pop23, $19 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop24 +; NO-SIMD128-NEXT: i32.xor $push26=, $10, $18 +; NO-SIMD128-NEXT: i32.const $push34=, -1 +; NO-SIMD128-NEXT: i32.xor $push25=, $2, $pop34 +; NO-SIMD128-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-NEXT: i32.xor $push28=, $pop27, $18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop28 +; NO-SIMD128-NEXT: i32.xor $push30=, $9, $17 +; NO-SIMD128-NEXT: i32.const $push33=, -1 +; NO-SIMD128-NEXT: i32.xor $push29=, $1, $pop33 ; NO-SIMD128-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $19 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop32 -; NO-SIMD128-NEXT: i32.xor $push34=, $10, $18 -; NO-SIMD128-NEXT: i32.const $push42=, -1 -; NO-SIMD128-NEXT: i32.xor $push33=, $2, $pop42 -; NO-SIMD128-NEXT: i32.and $push35=, $pop34, $pop33 -; NO-SIMD128-NEXT: i32.xor $push36=, $pop35, $18 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop36 -; NO-SIMD128-NEXT: i32.xor $push38=, $9, $17 -; NO-SIMD128-NEXT: i32.const $push41=, -1 -; NO-SIMD128-NEXT: i32.xor $push37=, $1, $pop41 -; NO-SIMD128-NEXT: i32.and $push39=, $pop38, $pop37 -; NO-SIMD128-NEXT: i32.xor $push40=, $pop39, $17 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop40 +; NO-SIMD128-NEXT: i32.xor $push32=, $pop31, $17 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop32 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v8i16: @@ -9382,55 +7830,47 @@ define <8 x i16> @bitselect_xor_reversed_v8i16(<8 x i16> %c, <8 x i16> %v1, <8 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $17 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $10, $18 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop39 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $18 ; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $11, $19 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop46 +; NO-SIMD128-FAST-NEXT: i32.const $push38=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop38 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $19 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $12, $20 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop45 +; NO-SIMD128-FAST-NEXT: i32.const $push37=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop37 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $20 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop16 -; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $13, $21 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push19=, $5, $pop44 -; NO-SIMD128-FAST-NEXT: i32.and $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $pop21, $21 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop22 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $14, $22 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push23=, $6, $pop43 -; NO-SIMD128-FAST-NEXT: i32.and $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $pop25, $22 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push33=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push34=, $0, $pop33 -; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $15, $23 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $7, $pop42 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push18=, $13, $21 +; NO-SIMD128-FAST-NEXT: i32.const $push36=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push17=, $5, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $pop18, $pop17 +; NO-SIMD128-FAST-NEXT: i32.xor $push20=, $pop19, $21 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.xor $push22=, $14, $22 +; NO-SIMD128-FAST-NEXT: i32.const $push35=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push21=, $6, $pop35 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.xor $push24=, $pop23, $22 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop24 +; NO-SIMD128-FAST-NEXT: i32.xor $push26=, $15, $23 +; NO-SIMD128-FAST-NEXT: i32.const $push34=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push25=, $7, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push27=, $pop26, $pop25 +; NO-SIMD128-FAST-NEXT: i32.xor $push28=, $pop27, $23 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop28 +; NO-SIMD128-FAST-NEXT: i32.xor $push30=, $16, $24 +; NO-SIMD128-FAST-NEXT: i32.const $push33=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push29=, $8, $pop33 ; NO-SIMD128-FAST-NEXT: i32.and $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop34), $pop32 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push40=, $0, $pop39 -; NO-SIMD128-FAST-NEXT: i32.xor $push36=, $16, $24 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push35=, $8, $pop41 -; NO-SIMD128-FAST-NEXT: i32.and $push37=, $pop36, $pop35 -; NO-SIMD128-FAST-NEXT: i32.xor $push38=, $pop37, $24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop40), $pop38 +; NO-SIMD128-FAST-NEXT: i32.xor $push32=, $pop31, $24 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop32 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <8 x i16> %v1, %v2 %notc = xor <8 x i16> %c, @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v8i16: ; NO-SIMD128: .functype extmul_low_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $5 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $21 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $24 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $3 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $19 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $23 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $2 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $18 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $22 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $1 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $17 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $21 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $8 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $24 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $4 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $20 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $7 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $23 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $6 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $22 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $4 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $20 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $3 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $19 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $2 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $18 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $1 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $17 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v8i16: @@ -9515,34 +7947,26 @@ define <8 x i16> @extmul_low_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $19 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $20 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $5 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $21 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $6 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $22 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $7 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $23 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $24 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $20 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $5 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $21 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $6 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $22 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $7 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $23 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $24 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9572,46 +7996,38 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v8i16: ; NO-SIMD128: .functype extmul_high_s_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend8_s $push1=, $13 -; NO-SIMD128-NEXT: i32.extend8_s $push0=, $29 +; NO-SIMD128-NEXT: i32.extend8_s $push1=, $16 +; NO-SIMD128-NEXT: i32.extend8_s $push0=, $32 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend8_s $push4=, $11 -; NO-SIMD128-NEXT: i32.extend8_s $push3=, $27 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop2 +; NO-SIMD128-NEXT: i32.extend8_s $push4=, $15 +; NO-SIMD128-NEXT: i32.extend8_s $push3=, $31 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend8_s $push7=, $10 -; NO-SIMD128-NEXT: i32.extend8_s $push6=, $26 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop5 +; NO-SIMD128-NEXT: i32.extend8_s $push7=, $14 +; NO-SIMD128-NEXT: i32.extend8_s $push6=, $30 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop8 -; NO-SIMD128-NEXT: i32.extend8_s $push10=, $9 -; NO-SIMD128-NEXT: i32.extend8_s $push9=, $25 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop8 +; NO-SIMD128-NEXT: i32.extend8_s $push10=, $13 +; NO-SIMD128-NEXT: i32.extend8_s $push9=, $29 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop11 -; NO-SIMD128-NEXT: i32.const $push15=, 14 -; NO-SIMD128-NEXT: i32.add $push16=, $0, $pop15 -; NO-SIMD128-NEXT: i32.extend8_s $push13=, $16 -; NO-SIMD128-NEXT: i32.extend8_s $push12=, $32 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop11 +; NO-SIMD128-NEXT: i32.extend8_s $push13=, $12 +; NO-SIMD128-NEXT: i32.extend8_s $push12=, $28 ; NO-SIMD128-NEXT: i32.mul $push14=, $pop13, $pop12 -; NO-SIMD128-NEXT: i32.store16 0($pop16), $pop14 -; NO-SIMD128-NEXT: i32.const $push20=, 12 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.extend8_s $push18=, $15 -; NO-SIMD128-NEXT: i32.extend8_s $push17=, $31 -; NO-SIMD128-NEXT: i32.mul $push19=, $pop18, $pop17 -; NO-SIMD128-NEXT: i32.store16 0($pop21), $pop19 -; NO-SIMD128-NEXT: i32.const $push25=, 10 -; NO-SIMD128-NEXT: i32.add $push26=, $0, $pop25 -; NO-SIMD128-NEXT: i32.extend8_s $push23=, $14 -; NO-SIMD128-NEXT: i32.extend8_s $push22=, $30 -; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 -; NO-SIMD128-NEXT: i32.store16 0($pop26), $pop24 -; NO-SIMD128-NEXT: i32.const $push30=, 6 -; NO-SIMD128-NEXT: i32.add $push31=, $0, $pop30 -; NO-SIMD128-NEXT: i32.extend8_s $push28=, $12 -; NO-SIMD128-NEXT: i32.extend8_s $push27=, $28 -; NO-SIMD128-NEXT: i32.mul $push29=, $pop28, $pop27 -; NO-SIMD128-NEXT: i32.store16 0($pop31), $pop29 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop14 +; NO-SIMD128-NEXT: i32.extend8_s $push16=, $11 +; NO-SIMD128-NEXT: i32.extend8_s $push15=, $27 +; NO-SIMD128-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop17 +; NO-SIMD128-NEXT: i32.extend8_s $push19=, $10 +; NO-SIMD128-NEXT: i32.extend8_s $push18=, $26 +; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop20 +; NO-SIMD128-NEXT: i32.extend8_s $push22=, $9 +; NO-SIMD128-NEXT: i32.extend8_s $push21=, $25 +; NO-SIMD128-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop23 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v8i16: @@ -9629,34 +8045,26 @@ define <8 x i16> @extmul_high_s_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend8_s $push6=, $27 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $12 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push11=, $28 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop10), $pop13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $13 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push14=, $29 -; NO-SIMD128-FAST-NEXT: i32.mul $push16=, $pop15, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop16 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push20=, $14 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $30 -; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop18), $pop21 -; NO-SIMD128-FAST-NEXT: i32.const $push22=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push23=, $0, $pop22 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push25=, $15 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push24=, $31 -; NO-SIMD128-FAST-NEXT: i32.mul $push26=, $pop25, $pop24 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop23), $pop26 -; NO-SIMD128-FAST-NEXT: i32.const $push27=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push28=, $0, $pop27 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push30=, $16 -; NO-SIMD128-FAST-NEXT: i32.extend8_s $push29=, $32 -; NO-SIMD128-FAST-NEXT: i32.mul $push31=, $pop30, $pop29 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop28), $pop31 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push10=, $12 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push9=, $28 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop11 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push13=, $13 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push12=, $29 +; NO-SIMD128-FAST-NEXT: i32.mul $push14=, $pop13, $pop12 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push16=, $14 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push15=, $30 +; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop17 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push19=, $15 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push18=, $31 +; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop20 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push22=, $16 +; NO-SIMD128-FAST-NEXT: i32.extend8_s $push21=, $32 +; NO-SIMD128-FAST-NEXT: i32.mul $push23=, $pop22, $pop21 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop23 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9687,61 +8095,53 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_low_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $21, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $19, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $18, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $17, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $8, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $24, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $24, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $7, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $23, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $23, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $6, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $22, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $22, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $4, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $20, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $21, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $4, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $20, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $3, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $19, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $2, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $18, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $1, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $17, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v8i16: @@ -9749,60 +8149,52 @@ define <8 x i16> @extmul_low_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $17, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $5, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $21, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $6, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $22, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $18, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $7, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $23, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $19, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $8, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $24, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $20, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $5, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $21, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $6, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $22, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $7, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $23, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $8, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $24, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9833,61 +8225,53 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128: .functype extmul_high_u_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 255 -; NO-SIMD128-NEXT: i32.and $push2=, $13, $pop0 -; NO-SIMD128-NEXT: i32.const $push47=, 255 -; NO-SIMD128-NEXT: i32.and $push1=, $29, $pop47 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store16 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push46=, 255 -; NO-SIMD128-NEXT: i32.and $push5=, $11, $pop46 -; NO-SIMD128-NEXT: i32.const $push45=, 255 -; NO-SIMD128-NEXT: i32.and $push4=, $27, $pop45 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store16 4($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push44=, 255 -; NO-SIMD128-NEXT: i32.and $push8=, $10, $pop44 -; NO-SIMD128-NEXT: i32.const $push43=, 255 -; NO-SIMD128-NEXT: i32.and $push7=, $26, $pop43 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store16 2($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push42=, 255 -; NO-SIMD128-NEXT: i32.and $push11=, $9, $pop42 -; NO-SIMD128-NEXT: i32.const $push41=, 255 -; NO-SIMD128-NEXT: i32.and $push10=, $25, $pop41 -; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store16 0($0), $pop12 -; NO-SIMD128-NEXT: i32.const $push16=, 14 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.const $push40=, 255 -; NO-SIMD128-NEXT: i32.and $push14=, $16, $pop40 +; NO-SIMD128-NEXT: i32.and $push2=, $16, $pop0 ; NO-SIMD128-NEXT: i32.const $push39=, 255 -; NO-SIMD128-NEXT: i32.and $push13=, $32, $pop39 -; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 -; NO-SIMD128-NEXT: i32.store16 0($pop17), $pop15 -; NO-SIMD128-NEXT: i32.const $push21=, 12 -; NO-SIMD128-NEXT: i32.add $push22=, $0, $pop21 +; NO-SIMD128-NEXT: i32.and $push1=, $32, $pop39 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store16 14($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push38=, 255 -; NO-SIMD128-NEXT: i32.and $push19=, $15, $pop38 +; NO-SIMD128-NEXT: i32.and $push5=, $15, $pop38 ; NO-SIMD128-NEXT: i32.const $push37=, 255 -; NO-SIMD128-NEXT: i32.and $push18=, $31, $pop37 -; NO-SIMD128-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-NEXT: i32.const $push26=, 10 -; NO-SIMD128-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-NEXT: i32.and $push4=, $31, $pop37 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store16 12($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push36=, 255 -; NO-SIMD128-NEXT: i32.and $push24=, $14, $pop36 +; NO-SIMD128-NEXT: i32.and $push8=, $14, $pop36 ; NO-SIMD128-NEXT: i32.const $push35=, 255 -; NO-SIMD128-NEXT: i32.and $push23=, $30, $pop35 -; NO-SIMD128-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-NEXT: i32.const $push31=, 6 -; NO-SIMD128-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-NEXT: i32.and $push7=, $30, $pop35 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store16 10($0), $pop9 ; NO-SIMD128-NEXT: i32.const $push34=, 255 -; NO-SIMD128-NEXT: i32.and $push29=, $12, $pop34 +; NO-SIMD128-NEXT: i32.and $push11=, $13, $pop34 ; NO-SIMD128-NEXT: i32.const $push33=, 255 -; NO-SIMD128-NEXT: i32.and $push28=, $28, $pop33 -; NO-SIMD128-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-NEXT: i32.and $push10=, $29, $pop33 +; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-NEXT: i32.store16 8($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push32=, 255 +; NO-SIMD128-NEXT: i32.and $push14=, $12, $pop32 +; NO-SIMD128-NEXT: i32.const $push31=, 255 +; NO-SIMD128-NEXT: i32.and $push13=, $28, $pop31 +; NO-SIMD128-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.store16 6($0), $pop15 +; NO-SIMD128-NEXT: i32.const $push30=, 255 +; NO-SIMD128-NEXT: i32.and $push17=, $11, $pop30 +; NO-SIMD128-NEXT: i32.const $push29=, 255 +; NO-SIMD128-NEXT: i32.and $push16=, $27, $pop29 +; NO-SIMD128-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-NEXT: i32.store16 4($0), $pop18 +; NO-SIMD128-NEXT: i32.const $push28=, 255 +; NO-SIMD128-NEXT: i32.and $push20=, $10, $pop28 +; NO-SIMD128-NEXT: i32.const $push27=, 255 +; NO-SIMD128-NEXT: i32.and $push19=, $26, $pop27 +; NO-SIMD128-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-NEXT: i32.store16 2($0), $pop21 +; NO-SIMD128-NEXT: i32.const $push26=, 255 +; NO-SIMD128-NEXT: i32.and $push23=, $9, $pop26 +; NO-SIMD128-NEXT: i32.const $push25=, 255 +; NO-SIMD128-NEXT: i32.and $push22=, $25, $pop25 +; NO-SIMD128-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-NEXT: i32.store16 0($0), $pop24 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v8i16: @@ -9895,60 +8279,52 @@ define <8 x i16> @extmul_high_u_v8i16(<16 x i8> %v1, <16 x i8> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 255 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $9, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push47=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop47 +; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $25, $pop39 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store16 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push46=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop46 -; NO-SIMD128-FAST-NEXT: i32.const $push45=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop45 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push44=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop44 -; NO-SIMD128-FAST-NEXT: i32.const $push43=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop43 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 6 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 -; NO-SIMD128-FAST-NEXT: i32.const $push42=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop42 -; NO-SIMD128-FAST-NEXT: i32.const $push41=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop41 -; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop14), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push40=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push16=, $13, $pop40 -; NO-SIMD128-FAST-NEXT: i32.const $push39=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push15=, $29, $pop39 -; NO-SIMD128-FAST-NEXT: i32.mul $push17=, $pop16, $pop15 -; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 10 -; NO-SIMD128-FAST-NEXT: i32.add $push22=, $0, $pop21 ; NO-SIMD128-FAST-NEXT: i32.const $push38=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push19=, $14, $pop38 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $10, $pop38 ; NO-SIMD128-FAST-NEXT: i32.const $push37=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push18=, $30, $pop37 -; NO-SIMD128-FAST-NEXT: i32.mul $push20=, $pop19, $pop18 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop22), $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push26=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push27=, $0, $pop26 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $26, $pop37 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store16 2($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push36=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push24=, $15, $pop36 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $11, $pop36 ; NO-SIMD128-FAST-NEXT: i32.const $push35=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push23=, $31, $pop35 -; NO-SIMD128-FAST-NEXT: i32.mul $push25=, $pop24, $pop23 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop27), $pop25 -; NO-SIMD128-FAST-NEXT: i32.const $push31=, 14 -; NO-SIMD128-FAST-NEXT: i32.add $push32=, $0, $pop31 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $27, $pop35 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store16 4($0), $pop9 ; NO-SIMD128-FAST-NEXT: i32.const $push34=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push29=, $16, $pop34 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $12, $pop34 ; NO-SIMD128-FAST-NEXT: i32.const $push33=, 255 -; NO-SIMD128-FAST-NEXT: i32.and $push28=, $32, $pop33 -; NO-SIMD128-FAST-NEXT: i32.mul $push30=, $pop29, $pop28 -; NO-SIMD128-FAST-NEXT: i32.store16 0($pop32), $pop30 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $28, $pop33 +; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 +; NO-SIMD128-FAST-NEXT: i32.store16 6($0), $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push32=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push14=, $13, $pop32 +; NO-SIMD128-FAST-NEXT: i32.const $push31=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push13=, $29, $pop31 +; NO-SIMD128-FAST-NEXT: i32.mul $push15=, $pop14, $pop13 +; NO-SIMD128-FAST-NEXT: i32.store16 8($0), $pop15 +; NO-SIMD128-FAST-NEXT: i32.const $push30=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push17=, $14, $pop30 +; NO-SIMD128-FAST-NEXT: i32.const $push29=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push16=, $30, $pop29 +; NO-SIMD128-FAST-NEXT: i32.mul $push18=, $pop17, $pop16 +; NO-SIMD128-FAST-NEXT: i32.store16 10($0), $pop18 +; NO-SIMD128-FAST-NEXT: i32.const $push28=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push20=, $15, $pop28 +; NO-SIMD128-FAST-NEXT: i32.const $push27=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push19=, $31, $pop27 +; NO-SIMD128-FAST-NEXT: i32.mul $push21=, $pop20, $pop19 +; NO-SIMD128-FAST-NEXT: i32.store16 12($0), $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push26=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push23=, $16, $pop26 +; NO-SIMD128-FAST-NEXT: i32.const $push25=, 255 +; NO-SIMD128-FAST-NEXT: i32.and $push22=, $32, $pop25 +; NO-SIMD128-FAST-NEXT: i32.mul $push24=, $pop23, $pop22 +; NO-SIMD128-FAST-NEXT: i32.store16 14($0), $pop24 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <16 x i8> %v1, <16 x i8> undef, <8 x i32> @@ -9979,16 +8355,14 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: add_v4i32: ; NO-SIMD128: .functype add_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4i32: @@ -10000,10 +8374,8 @@ define <4 x i32> @add_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = add <4 x i32> %x, %y ret <4 x i32> %a @@ -10025,16 +8397,14 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: sub_v4i32: ; NO-SIMD128: .functype sub_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4i32: @@ -10046,10 +8416,8 @@ define <4 x i32> @sub_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> %x, %y ret <4 x i32> %a @@ -10071,16 +8439,14 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: mul_v4i32: ; NO-SIMD128: .functype mul_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4i32: @@ -10092,10 +8458,8 @@ define <4 x i32> @mul_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = mul <4 x i32> %x, %y ret <4 x i32> %a @@ -10117,20 +8481,18 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_s_v4i32: ; NO-SIMD128: .functype min_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_s_v4i32: @@ -10145,11 +8507,9 @@ define <4 x i32> @min_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp slt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10172,20 +8532,18 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: min_u_v4i32: ; NO-SIMD128: .functype min_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.lt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.lt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.lt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.lt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.lt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.lt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.lt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.lt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_u_v4i32: @@ -10200,11 +8558,9 @@ define <4 x i32> @min_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.lt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.lt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ult <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10227,20 +8583,18 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_s_v4i32: ; NO-SIMD128: .functype max_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_s $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_s $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_s $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_s $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_s $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_s $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_s_v4i32: @@ -10255,11 +8609,9 @@ define <4 x i32> @max_s_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_s $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_s $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp sgt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10282,20 +8634,18 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: max_u_v4i32: ; NO-SIMD128: .functype max_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.gt_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.select $push1=, $3, $7, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.gt_u $push2=, $2, $6 -; NO-SIMD128-NEXT: i32.select $push3=, $2, $6, $pop2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop3 -; NO-SIMD128-NEXT: i32.gt_u $push4=, $1, $5 -; NO-SIMD128-NEXT: i32.select $push5=, $1, $5, $pop4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.gt_u $push6=, $4, $8 -; NO-SIMD128-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: i32.gt_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.select $push1=, $4, $8, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 +; NO-SIMD128-NEXT: i32.gt_u $push2=, $3, $7 +; NO-SIMD128-NEXT: i32.select $push3=, $3, $7, $pop2 +; NO-SIMD128-NEXT: i32.store 8($0), $pop3 +; NO-SIMD128-NEXT: i32.gt_u $push4=, $2, $6 +; NO-SIMD128-NEXT: i32.select $push5=, $2, $6, $pop4 +; NO-SIMD128-NEXT: i32.store 4($0), $pop5 +; NO-SIMD128-NEXT: i32.gt_u $push6=, $1, $5 +; NO-SIMD128-NEXT: i32.select $push7=, $1, $5, $pop6 +; NO-SIMD128-NEXT: i32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_u_v4i32: @@ -10310,11 +8660,9 @@ define <4 x i32> @max_u_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.gt_u $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.select $push5=, $3, $7, $pop4 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: i32.gt_u $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.select $push7=, $4, $8, $pop6 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = icmp ugt <4 x i32> %x, %y %a = select <4 x i1> %c, <4 x i32> %x, <4 x i32> %y @@ -10337,63 +8685,59 @@ define <4 x i32> @abs_v4i32(<4 x i32> %x) { ; NO-SIMD128-LABEL: abs_v4i32: ; NO-SIMD128: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.const $push0=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push21=, $4, $pop0 -; NO-SIMD128-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop20 +; NO-SIMD128-NEXT: i32.shr_s $push19=, $4, $pop0 +; NO-SIMD128-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop18 ; NO-SIMD128-NEXT: i32.sub $push2=, $pop1, $5 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.const $push19=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push18=, $3, $pop19 -; NO-SIMD128-NEXT: local.tee $push17=, $4=, $pop18 -; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push17=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push16=, $3, $pop17 +; NO-SIMD128-NEXT: local.tee $push15=, $4=, $pop16 +; NO-SIMD128-NEXT: i32.xor $push3=, $3, $pop15 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop3, $4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push14=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push13=, $2, $pop14 +; NO-SIMD128-NEXT: local.tee $push12=, $4=, $pop13 +; NO-SIMD128-NEXT: i32.xor $push5=, $2, $pop12 ; NO-SIMD128-NEXT: i32.sub $push6=, $pop5, $4 -; NO-SIMD128-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push16=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push15=, $2, $pop16 -; NO-SIMD128-NEXT: local.tee $push14=, $4=, $pop15 -; NO-SIMD128-NEXT: i32.xor $push7=, $2, $pop14 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push11=, 31 +; NO-SIMD128-NEXT: i32.shr_s $push10=, $1, $pop11 +; NO-SIMD128-NEXT: local.tee $push9=, $4=, $pop10 +; NO-SIMD128-NEXT: i32.xor $push7=, $1, $pop9 ; NO-SIMD128-NEXT: i32.sub $push8=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push13=, 31 -; NO-SIMD128-NEXT: i32.shr_s $push12=, $1, $pop13 -; NO-SIMD128-NEXT: local.tee $push11=, $4=, $pop12 -; NO-SIMD128-NEXT: i32.xor $push9=, $1, $pop11 -; NO-SIMD128-NEXT: i32.sub $push10=, $pop9, $4 -; NO-SIMD128-NEXT: i32.store 0($0), $pop10 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4i32: ; NO-SIMD128-FAST: .functype abs_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push21=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: local.tee $push20=, $5=, $pop21 -; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop20 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push19=, $1, $pop0 +; NO-SIMD128-FAST-NEXT: local.tee $push18=, $5=, $pop19 +; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop18 ; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop1, $5 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push18=, $2, $pop19 -; NO-SIMD128-FAST-NEXT: local.tee $push17=, $1=, $pop18 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push16=, $2, $pop17 +; NO-SIMD128-FAST-NEXT: local.tee $push15=, $1=, $pop16 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $2, $pop15 ; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop3, $1 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push15=, $3, $pop16 -; NO-SIMD128-FAST-NEXT: local.tee $push14=, $2=, $pop15 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push13=, $3, $pop14 +; NO-SIMD128-FAST-NEXT: local.tee $push12=, $2=, $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $3, $pop12 ; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop5, $2 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 31 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push12=, $4, $pop13 -; NO-SIMD128-FAST-NEXT: local.tee $push11=, $0=, $pop12 -; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $0 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, 31 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push10=, $4, $pop11 +; NO-SIMD128-FAST-NEXT: local.tee $push9=, $3=, $pop10 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: i32.sub $push8=, $pop7, $3 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> zeroinitializer, %x %b = icmp slt <4 x i32> %x, zeroinitializer @@ -10418,19 +8762,17 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype neg_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 0 -; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $3 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 0 -; NO-SIMD128-NEXT: i32.sub $push2=, $pop9, $2 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 0 -; NO-SIMD128-NEXT: i32.sub $push3=, $pop8, $1 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.sub $push1=, $pop0, $4 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 0 -; NO-SIMD128-NEXT: i32.sub $push4=, $pop7, $4 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.sub $push2=, $pop7, $3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 0 +; NO-SIMD128-NEXT: i32.sub $push3=, $pop6, $2 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 0 +; NO-SIMD128-NEXT: i32.sub $push4=, $pop5, $1 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4i32: @@ -10439,17 +8781,15 @@ define <4 x i32> @neg_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 0 ; NO-SIMD128-FAST-NEXT: i32.sub $push1=, $pop0, $1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop9, $2 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push2=, $pop7, $2 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop8, $3 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push3=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 0 -; NO-SIMD128-FAST-NEXT: i32.sub $push6=, $pop7, $4 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 0 +; NO-SIMD128-FAST-NEXT: i32.sub $push4=, $pop5, $4 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = sub <4 x i32> , %x ret <4 x i32> %a @@ -10471,16 +8811,14 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shl_v4i32: ; NO-SIMD128: .functype shl_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_v4i32: @@ -10492,10 +8830,8 @@ define <4 x i32> @shl_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10523,19 +8859,17 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128: .functype shl_const_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 5 -; NO-SIMD128-NEXT: i32.shl $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, 5 -; NO-SIMD128-NEXT: i32.shl $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, 5 -; NO-SIMD128-NEXT: i32.shl $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.shl $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, 5 -; NO-SIMD128-NEXT: i32.shl $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.shl $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, 5 +; NO-SIMD128-NEXT: i32.shl $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, 5 +; NO-SIMD128-NEXT: i32.shl $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_const_v4i32: @@ -10544,17 +8878,15 @@ define <4 x i32> @shl_const_v4i32(<4 x i32> %v) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 5 ; NO-SIMD128-FAST-NEXT: i32.shl $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 5 -; NO-SIMD128-FAST-NEXT: i32.shl $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, 5 +; NO-SIMD128-FAST-NEXT: i32.shl $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, ret <4 x i32> %a @@ -10606,16 +8938,14 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shl_vec_v4i32: ; NO-SIMD128: .functype shl_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shl $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shl $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shl $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shl $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shl $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shl $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shl $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shl $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shl_vec_v4i32: @@ -10627,10 +8957,8 @@ define <4 x i32> @shl_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shl $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shl $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shl $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = shl <4 x i32> %v, %x ret <4 x i32> %a @@ -10652,16 +8980,14 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_s_v4i32: ; NO-SIMD128: .functype shr_s_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_v4i32: @@ -10673,10 +8999,8 @@ define <4 x i32> @shr_s_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10731,16 +9055,14 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_s_vec_v4i32: ; NO-SIMD128: .functype shr_s_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_s $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_s $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_s $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_s $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_s $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_s $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_s $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_s $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_s_vec_v4i32: @@ -10752,10 +9074,8 @@ define <4 x i32> @shr_s_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_s $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_s $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_s $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = ashr <4 x i32> %v, %x ret <4 x i32> %a @@ -10777,16 +9097,14 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: shr_u_v4i32: ; NO-SIMD128: .functype shr_u_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $5 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $5 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $5 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $5 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_v4i32: @@ -10798,10 +9116,8 @@ define <4 x i32> @shr_u_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $5 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %t = insertelement <4 x i32> undef, i32 %x, i32 0 %s = shufflevector <4 x i32> %t, <4 x i32> undef, @@ -10856,16 +9172,14 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-LABEL: shr_u_vec_v4i32: ; NO-SIMD128: .functype shr_u_vec_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.shr_u $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.shr_u $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.shr_u $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.shr_u $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.shr_u $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.shr_u $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.shr_u $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.shr_u $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: shr_u_vec_v4i32: @@ -10877,10 +9191,8 @@ define <4 x i32> @shr_u_vec_v4i32(<4 x i32> %v, <4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.shr_u $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.shr_u $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.shr_u $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = lshr <4 x i32> %v, %x ret <4 x i32> %a @@ -10902,16 +9214,14 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: and_v4i32: ; NO-SIMD128: .functype and_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.and $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.and $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.and $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.and $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.and $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.and $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.and $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: and_v4i32: @@ -10923,10 +9233,8 @@ define <4 x i32> @and_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.and $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = and <4 x i32> %x, %y ret <4 x i32> %a @@ -10948,16 +9256,14 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: or_v4i32: ; NO-SIMD128: .functype or_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.or $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.or $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.or $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.or $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.or $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.or $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.or $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.or $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: or_v4i32: @@ -10969,10 +9275,8 @@ define <4 x i32> @or_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.or $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.or $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.or $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = or <4 x i32> %x, %y ret <4 x i32> %a @@ -10994,16 +9298,14 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: xor_v4i32: ; NO-SIMD128: .functype xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.xor $push0=, $3, $7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop0 -; NO-SIMD128-NEXT: i32.xor $push1=, $2, $6 -; NO-SIMD128-NEXT: i32.store 4($0), $pop1 -; NO-SIMD128-NEXT: i32.xor $push2=, $1, $5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.xor $push3=, $4, $8 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: i32.xor $push0=, $4, $8 +; NO-SIMD128-NEXT: i32.store 12($0), $pop0 +; NO-SIMD128-NEXT: i32.xor $push1=, $3, $7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop1 +; NO-SIMD128-NEXT: i32.xor $push2=, $2, $6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $1, $5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: xor_v4i32: @@ -11015,10 +9317,8 @@ define <4 x i32> @xor_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, %y ret <4 x i32> %a @@ -11041,19 +9341,17 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128: .functype not_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $3, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop1 -; NO-SIMD128-NEXT: i32.const $push9=, -1 -; NO-SIMD128-NEXT: i32.xor $push2=, $2, $pop9 -; NO-SIMD128-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push8=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $1, $pop8 -; NO-SIMD128-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 +; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 +; NO-SIMD128-NEXT: i32.store 12($0), $pop1 ; NO-SIMD128-NEXT: i32.const $push7=, -1 -; NO-SIMD128-NEXT: i32.xor $push4=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 +; NO-SIMD128-NEXT: i32.xor $push2=, $3, $pop7 +; NO-SIMD128-NEXT: i32.store 8($0), $pop2 +; NO-SIMD128-NEXT: i32.const $push6=, -1 +; NO-SIMD128-NEXT: i32.xor $push3=, $2, $pop6 +; NO-SIMD128-NEXT: i32.store 4($0), $pop3 +; NO-SIMD128-NEXT: i32.const $push5=, -1 +; NO-SIMD128-NEXT: i32.xor $push4=, $1, $pop5 +; NO-SIMD128-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: not_v4i32: @@ -11062,17 +9360,15 @@ define <4 x i32> @not_v4i32(<4 x i32> %x) { ; NO-SIMD128-FAST-NEXT: i32.const $push0=, -1 ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $1, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push2=, $2, $pop7 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop8 +; NO-SIMD128-FAST-NEXT: i32.const $push6=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $3, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $4, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: i32.const $push5=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $4, $pop5 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = xor <4 x i32> %x, ret <4 x i32> %a @@ -11096,23 +9392,21 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128: .functype andnot_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, -1 -; NO-SIMD128-NEXT: i32.xor $push1=, $7, $pop0 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push13=, -1 -; NO-SIMD128-NEXT: i32.xor $push3=, $6, $pop13 -; NO-SIMD128-NEXT: i32.and $push4=, $2, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push12=, -1 -; NO-SIMD128-NEXT: i32.xor $push5=, $5, $pop12 -; NO-SIMD128-NEXT: i32.and $push6=, $1, $pop5 -; NO-SIMD128-NEXT: i32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: i32.xor $push1=, $8, $pop0 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 ; NO-SIMD128-NEXT: i32.const $push11=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $8, $pop11 -; NO-SIMD128-NEXT: i32.and $push8=, $4, $pop7 -; NO-SIMD128-NEXT: i32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $pop11 +; NO-SIMD128-NEXT: i32.and $push4=, $3, $pop3 +; NO-SIMD128-NEXT: i32.store 8($0), $pop4 +; NO-SIMD128-NEXT: i32.const $push10=, -1 +; NO-SIMD128-NEXT: i32.xor $push5=, $6, $pop10 +; NO-SIMD128-NEXT: i32.and $push6=, $2, $pop5 +; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.const $push9=, -1 +; NO-SIMD128-NEXT: i32.xor $push7=, $5, $pop9 +; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop7 +; NO-SIMD128-NEXT: i32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: andnot_v4i32: @@ -11122,20 +9416,18 @@ define <4 x i32> @andnot_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: i32.xor $push1=, $5, $pop0 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop13 +; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push3=, $6, $pop11 ; NO-SIMD128-FAST-NEXT: i32.and $push4=, $2, $pop3 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push12=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop12 +; NO-SIMD128-FAST-NEXT: i32.const $push10=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $7, $pop10 ; NO-SIMD128-FAST-NEXT: i32.and $push6=, $3, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push7=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push8=, $0, $pop7 -; NO-SIMD128-FAST-NEXT: i32.const $push11=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $pop11 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $4, $pop9 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop8), $pop10 +; NO-SIMD128-FAST-NEXT: i32.const $push9=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push7=, $8, $pop9 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $4, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %inv_y = xor <4 x i32> %y, %a = and <4 x i32> %x, %inv_y @@ -11161,32 +9453,30 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-LABEL: bitselect_v4i32: ; NO-SIMD128: .functype bitselect_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.const $push1=, -1 ; NO-SIMD128-NEXT: i32.xor $push2=, $4, $pop1 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $12 ; NO-SIMD128-NEXT: i32.and $push0=, $4, $8 ; NO-SIMD128-NEXT: i32.or $push4=, $pop3, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push8=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $11 -; NO-SIMD128-NEXT: i32.and $push7=, $3, $7 -; NO-SIMD128-NEXT: i32.or $push10=, $pop9, $pop7 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push12=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $10 -; NO-SIMD128-NEXT: i32.and $push11=, $2, $6 -; NO-SIMD128-NEXT: i32.or $push14=, $pop13, $pop11 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push16=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $9 -; NO-SIMD128-NEXT: i32.and $push15=, $1, $5 -; NO-SIMD128-NEXT: i32.or $push18=, $pop17, $pop15 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push6=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $11 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $7 +; NO-SIMD128-NEXT: i32.or $push8=, $pop7, $pop5 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push10=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $10 +; NO-SIMD128-NEXT: i32.and $push9=, $2, $6 +; NO-SIMD128-NEXT: i32.or $push12=, $pop11, $pop9 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push14=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $9 +; NO-SIMD128-NEXT: i32.and $push13=, $1, $5 +; NO-SIMD128-NEXT: i32.or $push16=, $pop15, $pop13 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_v4i32: @@ -11198,26 +9488,24 @@ define <4 x i32> @bitselect_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2) { ; NO-SIMD128-FAST-NEXT: i32.and $push0=, $1, $5 ; NO-SIMD128-FAST-NEXT: i32.or $push4=, $pop3, $pop0 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $10 ; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $6 ; NO-SIMD128-FAST-NEXT: i32.or $push8=, $pop7, $pop5 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $11 ; NO-SIMD128-FAST-NEXT: i32.and $push9=, $3, $7 ; NO-SIMD128-FAST-NEXT: i32.or $push12=, $pop11, $pop9 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $12 ; NO-SIMD128-FAST-NEXT: i32.and $push13=, $4, $8 ; NO-SIMD128-FAST-NEXT: i32.or $push16=, $pop15, $pop13 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %masked_v1 = and <4 x i32> %c, %v1 %inv_mask = xor <4 x i32> , %c @@ -11244,24 +9532,22 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-LABEL: bitselect_xor_v4i32: ; NO-SIMD128: .functype bitselect_xor_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 ; NO-SIMD128-NEXT: i32.xor $push0=, $8, $12 ; NO-SIMD128-NEXT: i32.and $push1=, $pop0, $4 ; NO-SIMD128-NEXT: i32.xor $push2=, $pop1, $12 -; NO-SIMD128-NEXT: i32.store 0($pop4), $pop2 -; NO-SIMD128-NEXT: i32.xor $push5=, $7, $11 -; NO-SIMD128-NEXT: i32.and $push6=, $pop5, $3 -; NO-SIMD128-NEXT: i32.xor $push7=, $pop6, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop7 -; NO-SIMD128-NEXT: i32.xor $push8=, $6, $10 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $2 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push11=, $5, $9 -; NO-SIMD128-NEXT: i32.and $push12=, $pop11, $1 -; NO-SIMD128-NEXT: i32.xor $push13=, $pop12, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop13 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.xor $push3=, $7, $11 +; NO-SIMD128-NEXT: i32.and $push4=, $pop3, $3 +; NO-SIMD128-NEXT: i32.xor $push5=, $pop4, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.xor $push6=, $6, $10 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $2 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push9=, $5, $9 +; NO-SIMD128-NEXT: i32.and $push10=, $pop9, $1 +; NO-SIMD128-NEXT: i32.xor $push11=, $pop10, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_v4i32: @@ -11279,12 +9565,10 @@ define <4 x i32> @bitselect_xor_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x i32> %v2 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $3 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.and $push12=, $pop11, $4 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $pop12, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $8, $12 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $pop9, $4 +; NO-SIMD128-FAST-NEXT: i32.xor $push11=, $pop10, $12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %and = and <4 x i32> %xor1, %c @@ -11311,32 +9595,30 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-LABEL: bitselect_xor_reversed_v4i32: ; NO-SIMD128: .functype bitselect_xor_reversed_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 ; NO-SIMD128-NEXT: i32.xor $push2=, $8, $12 ; NO-SIMD128-NEXT: i32.const $push0=, -1 ; NO-SIMD128-NEXT: i32.xor $push1=, $4, $pop0 ; NO-SIMD128-NEXT: i32.and $push3=, $pop2, $pop1 ; NO-SIMD128-NEXT: i32.xor $push4=, $pop3, $12 -; NO-SIMD128-NEXT: i32.store 0($pop6), $pop4 -; NO-SIMD128-NEXT: i32.xor $push8=, $7, $11 -; NO-SIMD128-NEXT: i32.const $push21=, -1 -; NO-SIMD128-NEXT: i32.xor $push7=, $3, $pop21 -; NO-SIMD128-NEXT: i32.and $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.xor $push10=, $pop9, $11 -; NO-SIMD128-NEXT: i32.store 8($0), $pop10 -; NO-SIMD128-NEXT: i32.xor $push12=, $6, $10 -; NO-SIMD128-NEXT: i32.const $push20=, -1 -; NO-SIMD128-NEXT: i32.xor $push11=, $2, $pop20 -; NO-SIMD128-NEXT: i32.and $push13=, $pop12, $pop11 -; NO-SIMD128-NEXT: i32.xor $push14=, $pop13, $10 -; NO-SIMD128-NEXT: i32.store 4($0), $pop14 -; NO-SIMD128-NEXT: i32.xor $push16=, $5, $9 +; NO-SIMD128-NEXT: i32.store 12($0), $pop4 +; NO-SIMD128-NEXT: i32.xor $push6=, $7, $11 ; NO-SIMD128-NEXT: i32.const $push19=, -1 -; NO-SIMD128-NEXT: i32.xor $push15=, $1, $pop19 -; NO-SIMD128-NEXT: i32.and $push17=, $pop16, $pop15 -; NO-SIMD128-NEXT: i32.xor $push18=, $pop17, $9 -; NO-SIMD128-NEXT: i32.store 0($0), $pop18 +; NO-SIMD128-NEXT: i32.xor $push5=, $3, $pop19 +; NO-SIMD128-NEXT: i32.and $push7=, $pop6, $pop5 +; NO-SIMD128-NEXT: i32.xor $push8=, $pop7, $11 +; NO-SIMD128-NEXT: i32.store 8($0), $pop8 +; NO-SIMD128-NEXT: i32.xor $push10=, $6, $10 +; NO-SIMD128-NEXT: i32.const $push18=, -1 +; NO-SIMD128-NEXT: i32.xor $push9=, $2, $pop18 +; NO-SIMD128-NEXT: i32.and $push11=, $pop10, $pop9 +; NO-SIMD128-NEXT: i32.xor $push12=, $pop11, $10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop12 +; NO-SIMD128-NEXT: i32.xor $push14=, $5, $9 +; NO-SIMD128-NEXT: i32.const $push17=, -1 +; NO-SIMD128-NEXT: i32.xor $push13=, $1, $pop17 +; NO-SIMD128-NEXT: i32.and $push15=, $pop14, $pop13 +; NO-SIMD128-NEXT: i32.xor $push16=, $pop15, $9 +; NO-SIMD128-NEXT: i32.store 0($0), $pop16 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: bitselect_xor_reversed_v4i32: @@ -11349,25 +9631,23 @@ define <4 x i32> @bitselect_xor_reversed_v4i32(<4 x i32> %c, <4 x i32> %v1, <4 x ; NO-SIMD128-FAST-NEXT: i32.xor $push4=, $pop3, $9 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop4 ; NO-SIMD128-FAST-NEXT: i32.xor $push6=, $6, $10 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push5=, $2, $pop19 ; NO-SIMD128-FAST-NEXT: i32.and $push7=, $pop6, $pop5 ; NO-SIMD128-FAST-NEXT: i32.xor $push8=, $pop7, $10 ; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop8 ; NO-SIMD128-FAST-NEXT: i32.xor $push10=, $7, $11 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop20 +; NO-SIMD128-FAST-NEXT: i32.const $push18=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push9=, $3, $pop18 ; NO-SIMD128-FAST-NEXT: i32.and $push11=, $pop10, $pop9 ; NO-SIMD128-FAST-NEXT: i32.xor $push12=, $pop11, $11 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop12 -; NO-SIMD128-FAST-NEXT: i32.const $push17=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push18=, $0, $pop17 ; NO-SIMD128-FAST-NEXT: i32.xor $push14=, $8, $12 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, -1 -; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop19 +; NO-SIMD128-FAST-NEXT: i32.const $push17=, -1 +; NO-SIMD128-FAST-NEXT: i32.xor $push13=, $4, $pop17 ; NO-SIMD128-FAST-NEXT: i32.and $push15=, $pop14, $pop13 ; NO-SIMD128-FAST-NEXT: i32.xor $push16=, $pop15, $12 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop18), $pop16 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop16 ; NO-SIMD128-FAST-NEXT: return %xor1 = xor <4 x i32> %v1, %v2 %notc = xor <4 x i32> %c, @@ -11394,24 +9674,22 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_low_s_v4i32: ; NO-SIMD128: .functype extmul_low_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $3 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $11 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $4 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $12 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $2 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $10 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $3 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $11 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $1 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $9 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $2 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $10 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $4 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $1 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $9 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_s_v4i32: @@ -11429,12 +9707,10 @@ define <4 x i32> @extmul_low_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $11 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $4 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $12 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $4 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $12 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11464,24 +9740,22 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-LABEL: extmul_high_s_v4i32: ; NO-SIMD128: .functype extmul_high_s_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.extend16_s $push1=, $7 -; NO-SIMD128-NEXT: i32.extend16_s $push0=, $15 +; NO-SIMD128-NEXT: i32.extend16_s $push1=, $8 +; NO-SIMD128-NEXT: i32.extend16_s $push0=, $16 ; NO-SIMD128-NEXT: i32.mul $push2=, $pop1, $pop0 -; NO-SIMD128-NEXT: i32.store 8($0), $pop2 -; NO-SIMD128-NEXT: i32.extend16_s $push4=, $6 -; NO-SIMD128-NEXT: i32.extend16_s $push3=, $14 +; NO-SIMD128-NEXT: i32.store 12($0), $pop2 +; NO-SIMD128-NEXT: i32.extend16_s $push4=, $7 +; NO-SIMD128-NEXT: i32.extend16_s $push3=, $15 ; NO-SIMD128-NEXT: i32.mul $push5=, $pop4, $pop3 -; NO-SIMD128-NEXT: i32.store 4($0), $pop5 -; NO-SIMD128-NEXT: i32.extend16_s $push7=, $5 -; NO-SIMD128-NEXT: i32.extend16_s $push6=, $13 +; NO-SIMD128-NEXT: i32.store 8($0), $pop5 +; NO-SIMD128-NEXT: i32.extend16_s $push7=, $6 +; NO-SIMD128-NEXT: i32.extend16_s $push6=, $14 ; NO-SIMD128-NEXT: i32.mul $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.store 0($0), $pop8 -; NO-SIMD128-NEXT: i32.const $push12=, 12 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.extend16_s $push10=, $8 -; NO-SIMD128-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-NEXT: i32.store 4($0), $pop8 +; NO-SIMD128-NEXT: i32.extend16_s $push10=, $5 +; NO-SIMD128-NEXT: i32.extend16_s $push9=, $13 ; NO-SIMD128-NEXT: i32.mul $push11=, $pop10, $pop9 -; NO-SIMD128-NEXT: i32.store 0($pop13), $pop11 +; NO-SIMD128-NEXT: i32.store 0($0), $pop11 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_s_v4i32: @@ -11499,12 +9773,10 @@ define <4 x i32> @extmul_high_s_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: i32.extend16_s $push6=, $15 ; NO-SIMD128-FAST-NEXT: i32.mul $push8=, $pop7, $pop6 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop8 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push12=, $8 -; NO-SIMD128-FAST-NEXT: i32.extend16_s $push11=, $16 -; NO-SIMD128-FAST-NEXT: i32.mul $push13=, $pop12, $pop11 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop10), $pop13 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push10=, $8 +; NO-SIMD128-FAST-NEXT: i32.extend16_s $push9=, $16 +; NO-SIMD128-FAST-NEXT: i32.mul $push11=, $pop10, $pop9 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop11 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11535,31 +9807,29 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_low_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $3, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $11, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $2, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $4, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $12, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $1, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $3, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $9, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $11, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $2, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $10, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $1, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $9, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_low_u_v4i32: @@ -11567,30 +9837,28 @@ define <4 x i32> @extmul_low_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $1, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $9, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $2, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $10, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $3, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $11, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $4, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $12, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %low1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -11621,31 +9889,29 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128: .functype extmul_high_u_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: i32.const $push0=, 65535 -; NO-SIMD128-NEXT: i32.and $push2=, $7, $pop0 -; NO-SIMD128-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-NEXT: i32.and $push1=, $15, $pop21 -; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 -; NO-SIMD128-NEXT: i32.store 8($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-NEXT: i32.and $push5=, $6, $pop20 +; NO-SIMD128-NEXT: i32.and $push2=, $8, $pop0 ; NO-SIMD128-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-NEXT: i32.store 4($0), $pop6 +; NO-SIMD128-NEXT: i32.and $push1=, $16, $pop19 +; NO-SIMD128-NEXT: i32.mul $push3=, $pop2, $pop1 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 ; NO-SIMD128-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-NEXT: i32.and $push8=, $5, $pop18 +; NO-SIMD128-NEXT: i32.and $push5=, $7, $pop18 ; NO-SIMD128-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-NEXT: i32.and $push7=, $13, $pop17 -; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-NEXT: i32.store 0($0), $pop9 -; NO-SIMD128-NEXT: i32.const $push13=, 12 -; NO-SIMD128-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-NEXT: i32.and $push4=, $15, $pop17 +; NO-SIMD128-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.store 8($0), $pop6 ; NO-SIMD128-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-NEXT: i32.and $push8=, $6, $pop16 ; NO-SIMD128-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-NEXT: i32.and $push7=, $14, $pop15 +; NO-SIMD128-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-NEXT: i32.store 4($0), $pop9 +; NO-SIMD128-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-NEXT: i32.and $push11=, $5, $pop14 +; NO-SIMD128-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-NEXT: i32.and $push10=, $13, $pop13 ; NO-SIMD128-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-NEXT: i32.store 0($0), $pop12 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: extmul_high_u_v4i32: @@ -11653,30 +9919,28 @@ define <4 x i32> @extmul_high_u_v4i32(<8 x i16> %v1, <8 x i16> %v2) { ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: i32.const $push0=, 65535 ; NO-SIMD128-FAST-NEXT: i32.and $push2=, $5, $pop0 -; NO-SIMD128-FAST-NEXT: i32.const $push21=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop21 +; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push1=, $13, $pop19 ; NO-SIMD128-FAST-NEXT: i32.mul $push3=, $pop2, $pop1 ; NO-SIMD128-FAST-NEXT: i32.store 0($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push20=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop20 -; NO-SIMD128-FAST-NEXT: i32.const $push19=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop19 -; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 -; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push18=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop18 +; NO-SIMD128-FAST-NEXT: i32.and $push5=, $6, $pop18 ; NO-SIMD128-FAST-NEXT: i32.const $push17=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop17 -; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 -; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-FAST-NEXT: i32.const $push13=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push14=, $0, $pop13 +; NO-SIMD128-FAST-NEXT: i32.and $push4=, $14, $pop17 +; NO-SIMD128-FAST-NEXT: i32.mul $push6=, $pop5, $pop4 +; NO-SIMD128-FAST-NEXT: i32.store 4($0), $pop6 ; NO-SIMD128-FAST-NEXT: i32.const $push16=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop16 +; NO-SIMD128-FAST-NEXT: i32.and $push8=, $7, $pop16 ; NO-SIMD128-FAST-NEXT: i32.const $push15=, 65535 -; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop15 +; NO-SIMD128-FAST-NEXT: i32.and $push7=, $15, $pop15 +; NO-SIMD128-FAST-NEXT: i32.mul $push9=, $pop8, $pop7 +; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop9 +; NO-SIMD128-FAST-NEXT: i32.const $push14=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push11=, $8, $pop14 +; NO-SIMD128-FAST-NEXT: i32.const $push13=, 65535 +; NO-SIMD128-FAST-NEXT: i32.and $push10=, $16, $pop13 ; NO-SIMD128-FAST-NEXT: i32.mul $push12=, $pop11, $pop10 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop14), $pop12 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop12 ; NO-SIMD128-FAST-NEXT: return %high1 = shufflevector <8 x i16> %v1, <8 x i16> undef, <4 x i32> @@ -13061,16 +11325,14 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: neg_v4f32: ; NO-SIMD128: .functype neg_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.neg $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.neg $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.neg $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.neg $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.neg $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.neg $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.neg $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: neg_v4f32: @@ -13082,10 +11344,8 @@ define <4 x float> @neg_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.neg $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.neg $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.neg $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub nsz <4 x float> , %x ret <4 x float> %a @@ -13108,16 +11368,14 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: abs_v4f32: ; NO-SIMD128: .functype abs_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.abs $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.abs $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.abs $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.abs $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.abs $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.abs $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.abs $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: abs_v4f32: @@ -13129,10 +11387,8 @@ define <4 x float> @abs_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.abs $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.abs $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.abs $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x) ret <4 x float> %a @@ -13157,54 +11413,50 @@ define <4 x float> @min_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.gt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.gt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.gt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.gt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_unordered_v4f32: ; NO-SIMD128-FAST: .functype min_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.gt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.gt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.gt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ule <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13231,54 +11483,50 @@ define <4 x float> @max_unordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.lt $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.lt $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.lt $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.lt $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_unordered_v4f32: ; NO-SIMD128-FAST: .functype max_unordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.lt $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.lt $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.lt $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp uge <4 x float> %x, %a = select <4 x i1> %cmps, <4 x float> %x, @@ -13305,54 +11553,50 @@ define <4 x float> @min_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.ge $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.ge $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.ge $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.ge $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_ordered_v4f32: ; NO-SIMD128-FAST: .functype min_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.ge $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.ge $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.ge $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp ole <4 x float> , %x %a = select <4 x i1> %cmps, @@ -13379,54 +11623,50 @@ define <4 x float> @max_ordered_v4f32(<4 x float> %x) { ; NO-SIMD128: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push1=, $3, $pop17 -; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $3, $pop1 -; NO-SIMD128-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push16=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.le $push1=, $4, $pop15 +; NO-SIMD128-NEXT: f32.select $push2=, $pop0, $4, $pop1 +; NO-SIMD128-NEXT: f32.store 12($0), $pop2 ; NO-SIMD128-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push5=, $1, $pop13 -; NO-SIMD128-NEXT: f32.select $push6=, $pop14, $1, $pop5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop6 -; NO-SIMD128-NEXT: i32.const $push9=, 12 -; NO-SIMD128-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-NEXT: f32.le $push3=, $3, $pop13 +; NO-SIMD128-NEXT: f32.select $push4=, $pop14, $3, $pop3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop4 ; NO-SIMD128-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-NEXT: f32.le $push5=, $2, $pop11 +; NO-SIMD128-NEXT: f32.select $push6=, $pop12, $2, $pop5 +; NO-SIMD128-NEXT: f32.store 4($0), $pop6 +; NO-SIMD128-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-NEXT: f32.le $push7=, $1, $pop9 +; NO-SIMD128-NEXT: f32.select $push8=, $pop10, $1, $pop7 +; NO-SIMD128-NEXT: f32.store 0($0), $pop8 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_ordered_v4f32: ; NO-SIMD128-FAST: .functype max_ordered_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-FAST-NEXT: # %bb.0: ; NO-SIMD128-FAST-NEXT: f32.const $push0=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push17=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop17 +; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push1=, $1, $pop15 ; NO-SIMD128-FAST-NEXT: f32.select $push2=, $pop0, $1, $pop1 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push16=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.const $push15=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop15 -; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop16, $2, $pop3 -; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push14=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push13=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop13 -; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop14, $3, $pop5 -; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 -; NO-SIMD128-FAST-NEXT: i32.const $push9=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push10=, $0, $pop9 +; NO-SIMD128-FAST-NEXT: f32.le $push3=, $2, $pop13 +; NO-SIMD128-FAST-NEXT: f32.select $push4=, $pop14, $2, $pop3 +; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop4 ; NO-SIMD128-FAST-NEXT: f32.const $push12=, 0x1.4p2 ; NO-SIMD128-FAST-NEXT: f32.const $push11=, 0x1.4p2 -; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop11 -; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop12, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop10), $pop8 +; NO-SIMD128-FAST-NEXT: f32.le $push5=, $3, $pop11 +; NO-SIMD128-FAST-NEXT: f32.select $push6=, $pop12, $3, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push10=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.const $push9=, 0x1.4p2 +; NO-SIMD128-FAST-NEXT: f32.le $push7=, $4, $pop9 +; NO-SIMD128-FAST-NEXT: f32.select $push8=, $pop10, $4, $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop8 ; NO-SIMD128-FAST-NEXT: return %cmps = fcmp oge <4 x float> , %x %a = select <4 x i1> %cmps, @@ -13451,16 +11691,14 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: min_intrinsic_v4f32: ; NO-SIMD128: .functype min_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.min $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.min $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.min $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.min $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.min $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.min $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.min $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.min $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: min_intrinsic_v4f32: @@ -13472,10 +11710,8 @@ define <4 x float> @min_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.min $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.min $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.min $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.minimum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13552,16 +11788,14 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_intrinsic_v4f32: @@ -13573,10 +11807,8 @@ define <4 x float> @minnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13598,16 +11830,14 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: minnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype minnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fminf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fminf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fminf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fminf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fminf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fminf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fminf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: minnum_nsz_intrinsic_v4f32: @@ -13619,10 +11849,8 @@ define <4 x float> @minnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13647,19 +11875,17 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_non_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push2=, fminf, $2, $pop9 -; NO-SIMD128-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $1, $pop8 -; NO-SIMD128-NEXT: f32.store 0($0), $pop3 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 ; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop3 +; NO-SIMD128-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop4 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_non_zero_intrinsic: @@ -13668,17 +11894,15 @@ define <4 x float> @fminnumv432_non_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push0=, -0x1p0 ; NO-SIMD128-FAST-NEXT: call $push1=, fminf, $1, $pop0 ; NO-SIMD128-FAST-NEXT: f32.store 0($0), $pop1 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push2=, fminf, $2, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop2 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop8 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $3, $pop6 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop3 -; NO-SIMD128-FAST-NEXT: i32.const $push4=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push6=, fminf, $4, $pop7 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop5), $pop6 +; NO-SIMD128-FAST-NEXT: f32.const $push5=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $4, $pop5 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop4 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -13755,19 +11979,17 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128: .functype fminnumv432_one_zero_intrinsic (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fminf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fminf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fminf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fminf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fminf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fminf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fminf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: fminnumv432_one_zero_intrinsic: @@ -13779,14 +12001,12 @@ define <4 x float> @fminnumv432_one_zero_intrinsic(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fminf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fminf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fminf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fminf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.minnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -13809,16 +12029,14 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: max_intrinsic_v4f32: ; NO-SIMD128: .functype max_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.max $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.max $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.max $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.max $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.max $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.max $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.max $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.max $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: max_intrinsic_v4f32: @@ -13830,10 +12048,8 @@ define <4 x float> @max_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.max $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.max $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.max $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.maximum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13910,16 +12126,14 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_intrinsic_v4f32: @@ -13931,10 +12145,8 @@ define <4 x float> @maxnum_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -13956,16 +12168,14 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: maxnum_nsz_intrinsic_v4f32: ; NO-SIMD128: .functype maxnum_nsz_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: call $push0=, fmaxf, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: call $push2=, fmaxf, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: call $push0=, fmaxf, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: call $push3=, fmaxf, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_nsz_intrinsic_v4f32: @@ -13977,10 +12187,8 @@ define <4 x float> @maxnum_nsz_intrinsic_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: call $push2=, fmaxf, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call nnan nsz <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float> %y) ret <4 x float> %a @@ -14057,19 +12265,17 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_one_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x0p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x0p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_one_zero_intrinsic_v4f32: @@ -14081,14 +12287,12 @@ define <4 x float> @maxnum_one_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x0p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -14113,19 +12317,17 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128: .functype maxnum_non_zero_intrinsic_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: ; NO-SIMD128-NEXT: f32.const $push0=, -0x1p0 -; NO-SIMD128-NEXT: call $push1=, fmaxf, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.const $push2=, 0x1p0 -; NO-SIMD128-NEXT: call $push3=, fmaxf, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-NEXT: call $push4=, fmaxf, $1, $pop9 -; NO-SIMD128-NEXT: f32.store 0($0), $pop4 -; NO-SIMD128-NEXT: i32.const $push5=, 12 -; NO-SIMD128-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-NEXT: call $push1=, fmaxf, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-NEXT: call $push2=, fmaxf, $3, $pop7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop2 +; NO-SIMD128-NEXT: f32.const $push3=, 0x1p0 +; NO-SIMD128-NEXT: call $push4=, fmaxf, $2, $pop3 +; NO-SIMD128-NEXT: f32.store 4($0), $pop4 +; NO-SIMD128-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-NEXT: call $push5=, fmaxf, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop5 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: maxnum_non_zero_intrinsic_v4f32: @@ -14137,14 +12339,12 @@ define <4 x float> @maxnum_non_zero_intrinsic_v4f32(<4 x float> %x, <4 x float> ; NO-SIMD128-FAST-NEXT: f32.const $push2=, 0x1p0 ; NO-SIMD128-FAST-NEXT: call $push3=, fmaxf, $2, $pop2 ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-FAST-NEXT: f32.const $push9=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop9 +; NO-SIMD128-FAST-NEXT: f32.const $push7=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push4=, fmaxf, $3, $pop7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop4 -; NO-SIMD128-FAST-NEXT: i32.const $push5=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push6=, $0, $pop5 -; NO-SIMD128-FAST-NEXT: f32.const $push8=, -0x1p0 -; NO-SIMD128-FAST-NEXT: call $push7=, fmaxf, $4, $pop8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop6), $pop7 +; NO-SIMD128-FAST-NEXT: f32.const $push6=, -0x1p0 +; NO-SIMD128-FAST-NEXT: call $push5=, fmaxf, $4, $pop6 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop5 ; NO-SIMD128-FAST-NEXT: return %a = call nnan <4 x float> @llvm.maxnum.v4f32(<4 x float> %x, <4 x float>) ret <4 x float> %a @@ -14240,20 +12440,18 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmin_v4f32: ; NO-SIMD128: .functype pmin_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $7, $3 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $6, $2 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $5, $1 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $8, $4 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $8, $4 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $7, $3 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $6, $2 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $5, $1 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_v4f32: @@ -14268,11 +12466,9 @@ define <4 x float> @pmin_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $7, $3 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $8, $4 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %y, %x %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14295,28 +12491,26 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmin_int_v4f32: ; NO-SIMD128: .functype pmin_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $8 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $4 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $7 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $3 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $6 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $2 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $5 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $1 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $3 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $6 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $2 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $5 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $1 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmin_int_v4f32: @@ -14337,13 +12531,11 @@ define <4 x i32> @pmin_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $8 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $4 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14368,20 +12560,18 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: pmax_v4f32: ; NO-SIMD128: .functype pmax_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.lt $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.select $push1=, $7, $3, $pop0 -; NO-SIMD128-NEXT: f32.store 8($0), $pop1 -; NO-SIMD128-NEXT: f32.lt $push2=, $2, $6 -; NO-SIMD128-NEXT: f32.select $push3=, $6, $2, $pop2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop3 -; NO-SIMD128-NEXT: f32.lt $push4=, $1, $5 -; NO-SIMD128-NEXT: f32.select $push5=, $5, $1, $pop4 -; NO-SIMD128-NEXT: f32.store 0($0), $pop5 -; NO-SIMD128-NEXT: i32.const $push8=, 12 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: f32.lt $push6=, $4, $8 -; NO-SIMD128-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-NEXT: f32.lt $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.select $push1=, $8, $4, $pop0 +; NO-SIMD128-NEXT: f32.store 12($0), $pop1 +; NO-SIMD128-NEXT: f32.lt $push2=, $3, $7 +; NO-SIMD128-NEXT: f32.select $push3=, $7, $3, $pop2 +; NO-SIMD128-NEXT: f32.store 8($0), $pop3 +; NO-SIMD128-NEXT: f32.lt $push4=, $2, $6 +; NO-SIMD128-NEXT: f32.select $push5=, $6, $2, $pop4 +; NO-SIMD128-NEXT: f32.store 4($0), $pop5 +; NO-SIMD128-NEXT: f32.lt $push6=, $1, $5 +; NO-SIMD128-NEXT: f32.select $push7=, $5, $1, $pop6 +; NO-SIMD128-NEXT: f32.store 0($0), $pop7 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_v4f32: @@ -14396,11 +12586,9 @@ define <4 x float> @pmax_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push4=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.select $push5=, $7, $3, $pop4 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop5 -; NO-SIMD128-FAST-NEXT: i32.const $push8=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push9=, $0, $pop8 ; NO-SIMD128-FAST-NEXT: f32.lt $push6=, $4, $8 ; NO-SIMD128-FAST-NEXT: f32.select $push7=, $8, $4, $pop6 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop9), $pop7 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop7 ; NO-SIMD128-FAST-NEXT: return %c = fcmp olt <4 x float> %x, %y %a = select <4 x i1> %c, <4 x float> %y, <4 x float> %x @@ -14423,28 +12611,26 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: pmax_int_v4f32: ; NO-SIMD128: .functype pmax_int_v4f32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push1=, $4 ; NO-SIMD128-NEXT: f32.reinterpret_i32 $push0=, $8 ; NO-SIMD128-NEXT: f32.lt $push2=, $pop1, $pop0 ; NO-SIMD128-NEXT: i32.select $push3=, $8, $4, $pop2 -; NO-SIMD128-NEXT: i32.store 0($pop5), $pop3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push7=, $3 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push6=, $7 -; NO-SIMD128-NEXT: f32.lt $push8=, $pop7, $pop6 -; NO-SIMD128-NEXT: i32.select $push9=, $7, $3, $pop8 -; NO-SIMD128-NEXT: i32.store 8($0), $pop9 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push11=, $2 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push10=, $6 -; NO-SIMD128-NEXT: f32.lt $push12=, $pop11, $pop10 -; NO-SIMD128-NEXT: i32.select $push13=, $6, $2, $pop12 -; NO-SIMD128-NEXT: i32.store 4($0), $pop13 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push15=, $1 -; NO-SIMD128-NEXT: f32.reinterpret_i32 $push14=, $5 -; NO-SIMD128-NEXT: f32.lt $push16=, $pop15, $pop14 -; NO-SIMD128-NEXT: i32.select $push17=, $5, $1, $pop16 -; NO-SIMD128-NEXT: i32.store 0($0), $pop17 +; NO-SIMD128-NEXT: i32.store 12($0), $pop3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push5=, $3 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push4=, $7 +; NO-SIMD128-NEXT: f32.lt $push6=, $pop5, $pop4 +; NO-SIMD128-NEXT: i32.select $push7=, $7, $3, $pop6 +; NO-SIMD128-NEXT: i32.store 8($0), $pop7 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push9=, $2 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push8=, $6 +; NO-SIMD128-NEXT: f32.lt $push10=, $pop9, $pop8 +; NO-SIMD128-NEXT: i32.select $push11=, $6, $2, $pop10 +; NO-SIMD128-NEXT: i32.store 4($0), $pop11 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push13=, $1 +; NO-SIMD128-NEXT: f32.reinterpret_i32 $push12=, $5 +; NO-SIMD128-NEXT: f32.lt $push14=, $pop13, $pop12 +; NO-SIMD128-NEXT: i32.select $push15=, $5, $1, $pop14 +; NO-SIMD128-NEXT: i32.store 0($0), $pop15 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: pmax_int_v4f32: @@ -14465,13 +12651,11 @@ define <4 x i32> @pmax_int_v4f32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-FAST-NEXT: f32.lt $push10=, $pop9, $pop8 ; NO-SIMD128-FAST-NEXT: i32.select $push11=, $7, $3, $pop10 ; NO-SIMD128-FAST-NEXT: i32.store 8($0), $pop11 -; NO-SIMD128-FAST-NEXT: i32.const $push16=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push17=, $0, $pop16 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push13=, $4 ; NO-SIMD128-FAST-NEXT: f32.reinterpret_i32 $push12=, $8 ; NO-SIMD128-FAST-NEXT: f32.lt $push14=, $pop13, $pop12 ; NO-SIMD128-FAST-NEXT: i32.select $push15=, $8, $4, $pop14 -; NO-SIMD128-FAST-NEXT: i32.store 0($pop17), $pop15 +; NO-SIMD128-FAST-NEXT: i32.store 12($0), $pop15 ; NO-SIMD128-FAST-NEXT: return %fx = bitcast <4 x i32> %x to <4 x float> %fy = bitcast <4 x i32> %y to <4 x float> @@ -14496,16 +12680,14 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: add_v4f32: ; NO-SIMD128: .functype add_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.add $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.add $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.add $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.add $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.add $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.add $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.add $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.add $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: add_v4f32: @@ -14517,10 +12699,8 @@ define <4 x float> @add_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.add $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.add $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.add $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fadd <4 x float> %x, %y ret <4 x float> %a @@ -14542,16 +12722,14 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: sub_v4f32: ; NO-SIMD128: .functype sub_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sub $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sub $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sub $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.sub $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.sub $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sub $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sub $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sub $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sub_v4f32: @@ -14563,10 +12741,8 @@ define <4 x float> @sub_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sub $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sub $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sub $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fsub <4 x float> %x, %y ret <4 x float> %a @@ -14588,16 +12764,14 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: div_v4f32: ; NO-SIMD128: .functype div_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.div $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.div $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.div $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.div $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.div $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.div $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.div $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.div $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: div_v4f32: @@ -14609,10 +12783,8 @@ define <4 x float> @div_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.div $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.div $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.div $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fdiv <4 x float> %x, %y ret <4 x float> %a @@ -14634,16 +12806,14 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: mul_v4f32: ; NO-SIMD128: .functype mul_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.mul $push0=, $3, $7 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.mul $push1=, $2, $6 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.mul $push2=, $1, $5 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push4=, 12 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: f32.mul $push3=, $4, $8 -; NO-SIMD128-NEXT: f32.store 0($pop5), $pop3 +; NO-SIMD128-NEXT: f32.mul $push0=, $4, $8 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.mul $push1=, $3, $7 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.mul $push2=, $2, $6 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.mul $push3=, $1, $5 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: mul_v4f32: @@ -14655,10 +12825,8 @@ define <4 x float> @mul_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.mul $push2=, $3, $7 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.mul $push5=, $4, $8 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.mul $push3=, $4, $8 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = fmul <4 x float> %x, %y ret <4 x float> %a @@ -14681,16 +12849,14 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-LABEL: sqrt_v4f32: ; NO-SIMD128: .functype sqrt_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: -; NO-SIMD128-NEXT: f32.sqrt $push0=, $3 -; NO-SIMD128-NEXT: f32.store 8($0), $pop0 -; NO-SIMD128-NEXT: f32.sqrt $push1=, $2 -; NO-SIMD128-NEXT: f32.store 4($0), $pop1 -; NO-SIMD128-NEXT: f32.sqrt $push2=, $1 -; NO-SIMD128-NEXT: f32.store 0($0), $pop2 -; NO-SIMD128-NEXT: i32.const $push3=, 12 -; NO-SIMD128-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-NEXT: f32.sqrt $push0=, $4 +; NO-SIMD128-NEXT: f32.store 12($0), $pop0 +; NO-SIMD128-NEXT: f32.sqrt $push1=, $3 +; NO-SIMD128-NEXT: f32.store 8($0), $pop1 +; NO-SIMD128-NEXT: f32.sqrt $push2=, $2 +; NO-SIMD128-NEXT: f32.store 4($0), $pop2 +; NO-SIMD128-NEXT: f32.sqrt $push3=, $1 +; NO-SIMD128-NEXT: f32.store 0($0), $pop3 ; NO-SIMD128-NEXT: return ; ; NO-SIMD128-FAST-LABEL: sqrt_v4f32: @@ -14702,10 +12868,8 @@ define <4 x float> @sqrt_v4f32(<4 x float> %x) { ; NO-SIMD128-FAST-NEXT: f32.store 4($0), $pop1 ; NO-SIMD128-FAST-NEXT: f32.sqrt $push2=, $3 ; NO-SIMD128-FAST-NEXT: f32.store 8($0), $pop2 -; NO-SIMD128-FAST-NEXT: i32.const $push3=, 12 -; NO-SIMD128-FAST-NEXT: i32.add $push4=, $0, $pop3 -; NO-SIMD128-FAST-NEXT: f32.sqrt $push5=, $4 -; NO-SIMD128-FAST-NEXT: f32.store 0($pop4), $pop5 +; NO-SIMD128-FAST-NEXT: f32.sqrt $push3=, $4 +; NO-SIMD128-FAST-NEXT: f32.store 12($0), $pop3 ; NO-SIMD128-FAST-NEXT: return %a = call <4 x float> @llvm.sqrt.v4f32(<4 x float> %x) ret <4 x float> %a diff --git a/llvm/test/CodeGen/WebAssembly/simd.ll b/llvm/test/CodeGen/WebAssembly/simd.ll index d2a38de..5ec9f6a 100644 --- a/llvm/test/CodeGen/WebAssembly/simd.ll +++ b/llvm/test/CodeGen/WebAssembly/simd.ll @@ -38,44 +38,22 @@ define <16 x i8> @splat_v16i8(i8 %x) { ; NO-SIMD128-LABEL: splat_v16i8: ; NO-SIMD128: .functype splat_v16i8 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $1 +; NO-SIMD128-NEXT: i32.store8 14($0), $1 +; NO-SIMD128-NEXT: i32.store8 13($0), $1 +; NO-SIMD128-NEXT: i32.store8 12($0), $1 +; NO-SIMD128-NEXT: i32.store8 11($0), $1 +; NO-SIMD128-NEXT: i32.store8 10($0), $1 +; NO-SIMD128-NEXT: i32.store8 9($0), $1 ; NO-SIMD128-NEXT: i32.store8 8($0), $1 +; NO-SIMD128-NEXT: i32.store8 7($0), $1 +; NO-SIMD128-NEXT: i32.store8 6($0), $1 +; NO-SIMD128-NEXT: i32.store8 5($0), $1 ; NO-SIMD128-NEXT: i32.store8 4($0), $1 +; NO-SIMD128-NEXT: i32.store8 3($0), $1 ; NO-SIMD128-NEXT: i32.store8 2($0), $1 ; NO-SIMD128-NEXT: i32.store8 1($0), $1 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $1 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $1 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $1 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $1 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $1 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $1 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $1 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $1 ; NO-SIMD128-NEXT: return %v = insertelement <16 x i8> undef, i8 %x, i32 0 %res = shufflevector <16 x i8> %v, <16 x i8> undef, @@ -356,44 +334,22 @@ define <16 x i8> @replace_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_v16i8: ; NO-SIMD128: .functype replace_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $17 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $17 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 11 ret <16 x i8> %res @@ -461,44 +417,22 @@ define <16 x i8> @replace_zero_v16i8(<16 x i8> %v, i8 %x) { ; NO-SIMD128-LABEL: replace_zero_v16i8: ; NO-SIMD128: .functype replace_zero_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $17 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return %res = insertelement <16 x i8> %v, i8 %x, i32 0 ret <16 x i8> %res @@ -514,44 +448,22 @@ define <16 x i8> @shuffle_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_v16i8: ; NO-SIMD128: .functype shuffle_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $32 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $30 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $28 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $26 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $24 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $22 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $20 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $18 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $32 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $30 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $28 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $26 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $24 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $22 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $20 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> @shuffle_undef_v16i8(<16 x i8> %x, <16 x i8> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v16i8: ; NO-SIMD128: .functype shuffle_undef_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $2 +; NO-SIMD128-NEXT: i32.store8 14($0), $2 +; NO-SIMD128-NEXT: i32.store8 13($0), $2 +; NO-SIMD128-NEXT: i32.store8 12($0), $2 +; NO-SIMD128-NEXT: i32.store8 11($0), $2 +; NO-SIMD128-NEXT: i32.store8 10($0), $2 +; NO-SIMD128-NEXT: i32.store8 9($0), $2 ; NO-SIMD128-NEXT: i32.store8 8($0), $2 +; NO-SIMD128-NEXT: i32.store8 7($0), $2 +; NO-SIMD128-NEXT: i32.store8 6($0), $2 +; NO-SIMD128-NEXT: i32.store8 5($0), $2 ; NO-SIMD128-NEXT: i32.store8 4($0), $2 +; NO-SIMD128-NEXT: i32.store8 3($0), $2 ; NO-SIMD128-NEXT: i32.store8 2($0), $2 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $2 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $2 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $2 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $2 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $2 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $2 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $2 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <16 x i8> %x, <16 x i8> %y, <16 x i32> @build_v16i8(i8 %x0, i8 %x1, i8 %x2, i8 %x3, ; NO-SIMD128-LABEL: build_v16i8: ; NO-SIMD128: .functype build_v16i8 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store8 15($0), $16 +; NO-SIMD128-NEXT: i32.store8 14($0), $15 +; NO-SIMD128-NEXT: i32.store8 13($0), $14 +; NO-SIMD128-NEXT: i32.store8 12($0), $13 +; NO-SIMD128-NEXT: i32.store8 11($0), $12 +; NO-SIMD128-NEXT: i32.store8 10($0), $11 +; NO-SIMD128-NEXT: i32.store8 9($0), $10 ; NO-SIMD128-NEXT: i32.store8 8($0), $9 +; NO-SIMD128-NEXT: i32.store8 7($0), $8 +; NO-SIMD128-NEXT: i32.store8 6($0), $7 +; NO-SIMD128-NEXT: i32.store8 5($0), $6 ; NO-SIMD128-NEXT: i32.store8 4($0), $5 +; NO-SIMD128-NEXT: i32.store8 3($0), $4 ; NO-SIMD128-NEXT: i32.store8 2($0), $3 ; NO-SIMD128-NEXT: i32.store8 1($0), $2 ; NO-SIMD128-NEXT: i32.store8 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 15 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store8 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 14 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store8 0($pop3), $15 -; NO-SIMD128-NEXT: i32.const $push4=, 13 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store8 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 12 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store8 0($pop7), $13 -; NO-SIMD128-NEXT: i32.const $push8=, 11 -; NO-SIMD128-NEXT: i32.add $push9=, $0, $pop8 -; NO-SIMD128-NEXT: i32.store8 0($pop9), $12 -; NO-SIMD128-NEXT: i32.const $push10=, 10 -; NO-SIMD128-NEXT: i32.add $push11=, $0, $pop10 -; NO-SIMD128-NEXT: i32.store8 0($pop11), $11 -; NO-SIMD128-NEXT: i32.const $push12=, 9 -; NO-SIMD128-NEXT: i32.add $push13=, $0, $pop12 -; NO-SIMD128-NEXT: i32.store8 0($pop13), $10 -; NO-SIMD128-NEXT: i32.const $push14=, 7 -; NO-SIMD128-NEXT: i32.add $push15=, $0, $pop14 -; NO-SIMD128-NEXT: i32.store8 0($pop15), $8 -; NO-SIMD128-NEXT: i32.const $push16=, 6 -; NO-SIMD128-NEXT: i32.add $push17=, $0, $pop16 -; NO-SIMD128-NEXT: i32.store8 0($pop17), $7 -; NO-SIMD128-NEXT: i32.const $push18=, 5 -; NO-SIMD128-NEXT: i32.add $push19=, $0, $pop18 -; NO-SIMD128-NEXT: i32.store8 0($pop19), $6 -; NO-SIMD128-NEXT: i32.const $push20=, 3 -; NO-SIMD128-NEXT: i32.add $push21=, $0, $pop20 -; NO-SIMD128-NEXT: i32.store8 0($pop21), $4 ; NO-SIMD128-NEXT: return i8 %x4, i8 %x5, i8 %x6, i8 %x7, i8 %x8, i8 %x9, i8 %x10, i8 %x11, @@ -734,22 +602,14 @@ define <8 x i16> @splat_v8i16(i16 %x) { ; NO-SIMD128-LABEL: splat_v8i16: ; NO-SIMD128: .functype splat_v8i16 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $1 +; NO-SIMD128-NEXT: i32.store16 12($0), $1 +; NO-SIMD128-NEXT: i32.store16 10($0), $1 ; NO-SIMD128-NEXT: i32.store16 8($0), $1 +; NO-SIMD128-NEXT: i32.store16 6($0), $1 ; NO-SIMD128-NEXT: i32.store16 4($0), $1 ; NO-SIMD128-NEXT: i32.store16 2($0), $1 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $1 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $1 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $1 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $1 ; NO-SIMD128-NEXT: return %v = insertelement <8 x i16> undef, i16 %x, i32 0 %res = shufflevector <8 x i16> %v, <8 x i16> undef, @@ -1016,22 +876,14 @@ define <8 x i16> @replace_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_v8i16: ; NO-SIMD128: .functype replace_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $9 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $9 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 7 ret <8 x i16> %res @@ -1095,22 +947,14 @@ define <8 x i16> @replace_zero_v8i16(<8 x i16> %v, i16 %x) { ; NO-SIMD128-LABEL: replace_zero_v8i16: ; NO-SIMD128: .functype replace_zero_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $9 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return %res = insertelement <8 x i16> %v, i16 %x, i32 0 ret <8 x i16> %res @@ -1126,22 +970,14 @@ define <8 x i16> @shuffle_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_v8i16: ; NO-SIMD128: .functype shuffle_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $16 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $14 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $12 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $10 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $16 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $14 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $12 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> @@ -1158,22 +994,14 @@ define <8 x i16> @shuffle_undef_v8i16(<8 x i16> %x, <8 x i16> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v8i16: ; NO-SIMD128: .functype shuffle_undef_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $2 +; NO-SIMD128-NEXT: i32.store16 12($0), $2 +; NO-SIMD128-NEXT: i32.store16 10($0), $2 ; NO-SIMD128-NEXT: i32.store16 8($0), $2 +; NO-SIMD128-NEXT: i32.store16 6($0), $2 ; NO-SIMD128-NEXT: i32.store16 4($0), $2 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $2 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $2 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $2 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <8 x i16> %x, <8 x i16> %y, <8 x i32> @build_v8i16(i16 %x0, i16 %x1, i16 %x2, i16 %x3, ; NO-SIMD128-LABEL: build_v8i16: ; NO-SIMD128: .functype build_v8i16 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store16 14($0), $8 +; NO-SIMD128-NEXT: i32.store16 12($0), $7 +; NO-SIMD128-NEXT: i32.store16 10($0), $6 ; NO-SIMD128-NEXT: i32.store16 8($0), $5 +; NO-SIMD128-NEXT: i32.store16 6($0), $4 ; NO-SIMD128-NEXT: i32.store16 4($0), $3 ; NO-SIMD128-NEXT: i32.store16 2($0), $2 ; NO-SIMD128-NEXT: i32.store16 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 14 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store16 0($pop1), $8 -; NO-SIMD128-NEXT: i32.const $push2=, 12 -; NO-SIMD128-NEXT: i32.add $push3=, $0, $pop2 -; NO-SIMD128-NEXT: i32.store16 0($pop3), $7 -; NO-SIMD128-NEXT: i32.const $push4=, 10 -; NO-SIMD128-NEXT: i32.add $push5=, $0, $pop4 -; NO-SIMD128-NEXT: i32.store16 0($pop5), $6 -; NO-SIMD128-NEXT: i32.const $push6=, 6 -; NO-SIMD128-NEXT: i32.add $push7=, $0, $pop6 -; NO-SIMD128-NEXT: i32.store16 0($pop7), $4 ; NO-SIMD128-NEXT: return i16 %x4, i16 %x5, i16 %x6, i16 %x7) { %t0 = insertelement <8 x i16> undef, i16 %x0, i32 0 @@ -1258,12 +1078,10 @@ define <4 x i32> @splat_v4i32(i32 %x) { ; NO-SIMD128-LABEL: splat_v4i32: ; NO-SIMD128: .functype splat_v4i32 (i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $1 ; NO-SIMD128-NEXT: i32.store 8($0), $1 ; NO-SIMD128-NEXT: i32.store 4($0), $1 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x i32> undef, i32 %x, i32 0 %res = shufflevector <4 x i32> %v, <4 x i32> undef, @@ -1368,12 +1186,10 @@ define <4 x i32> @replace_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_v4i32: ; NO-SIMD128: .functype replace_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $5 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 2 ret <4 x i32> %res @@ -1433,12 +1249,10 @@ define <4 x i32> @replace_zero_v4i32(<4 x i32> %v, i32 %x) { ; NO-SIMD128-LABEL: replace_zero_v4i32: ; NO-SIMD128: .functype replace_zero_v4i32 (i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x i32> %v, i32 %x, i32 0 ret <4 x i32> %res @@ -1454,12 +1268,10 @@ define <4 x i32> @shuffle_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_v4i32: ; NO-SIMD128: .functype shuffle_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $8 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $6 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -1476,12 +1288,10 @@ define <4 x i32> @shuffle_undef_v4i32(<4 x i32> %x, <4 x i32> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4i32: ; NO-SIMD128: .functype shuffle_undef_v4i32 (i32, i32, i32, i32, i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $2 ; NO-SIMD128-NEXT: i32.store 8($0), $2 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x i32> %x, <4 x i32> %y, <4 x i32> @@ -1501,12 +1311,10 @@ define <4 x i32> @build_v4i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { ; NO-SIMD128-LABEL: build_v4i32: ; NO-SIMD128: .functype build_v4i32 (i32, i32, i32, i32, i32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: i32.store 12($0), $4 ; NO-SIMD128-NEXT: i32.store 8($0), $3 ; NO-SIMD128-NEXT: i32.store 4($0), $2 ; NO-SIMD128-NEXT: i32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: i32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x i32> undef, i32 %x0, i32 0 %t1 = insertelement <4 x i32> %t0, i32 %x1, i32 1 @@ -1801,12 +1609,10 @@ define <4 x float> @splat_v4f32(float %x) { ; NO-SIMD128-LABEL: splat_v4f32: ; NO-SIMD128: .functype splat_v4f32 (i32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $1 ; NO-SIMD128-NEXT: f32.store 8($0), $1 ; NO-SIMD128-NEXT: f32.store 4($0), $1 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $1 ; NO-SIMD128-NEXT: return %v = insertelement <4 x float> undef, float %x, i32 0 %res = shufflevector <4 x float> %v, <4 x float> undef, @@ -1911,12 +1717,10 @@ define <4 x float> @replace_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_v4f32: ; NO-SIMD128: .functype replace_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $5 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 2 ret <4 x float> %res @@ -1976,12 +1780,10 @@ define <4 x float> @replace_zero_v4f32(<4 x float> %v, float %x) { ; NO-SIMD128-LABEL: replace_zero_v4f32: ; NO-SIMD128: .functype replace_zero_v4f32 (i32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $5 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %res = insertelement <4 x float> %v, float %x, i32 0 ret <4 x float> %res @@ -1997,12 +1799,10 @@ define <4 x float> @shuffle_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_v4f32: ; NO-SIMD128: .functype shuffle_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $8 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $6 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $8 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -2019,12 +1819,10 @@ define <4 x float> @shuffle_undef_v4f32(<4 x float> %x, <4 x float> %y) { ; NO-SIMD128-LABEL: shuffle_undef_v4f32: ; NO-SIMD128: .functype shuffle_undef_v4f32 (i32, f32, f32, f32, f32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $2 ; NO-SIMD128-NEXT: f32.store 8($0), $2 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $2 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $2 ; NO-SIMD128-NEXT: return %res = shufflevector <4 x float> %x, <4 x float> %y, <4 x i32> @@ -2044,12 +1842,10 @@ define <4 x float> @build_v4f32(float %x0, float %x1, float %x2, float %x3) { ; NO-SIMD128-LABEL: build_v4f32: ; NO-SIMD128: .functype build_v4f32 (i32, f32, f32, f32, f32) -> () ; NO-SIMD128-NEXT: # %bb.0: +; NO-SIMD128-NEXT: f32.store 12($0), $4 ; NO-SIMD128-NEXT: f32.store 8($0), $3 ; NO-SIMD128-NEXT: f32.store 4($0), $2 ; NO-SIMD128-NEXT: f32.store 0($0), $1 -; NO-SIMD128-NEXT: i32.const $push0=, 12 -; NO-SIMD128-NEXT: i32.add $push1=, $0, $pop0 -; NO-SIMD128-NEXT: f32.store 0($pop1), $4 ; NO-SIMD128-NEXT: return %t0 = insertelement <4 x float> undef, float %x0, i32 0 %t1 = insertelement <4 x float> %t0, float %x1, i32 1 -- cgit v1.1 From 51f1cb5355d296ccb7756944d0545d9c96066b78 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 4 Apr 2024 08:04:21 -0700 Subject: [X86] Add or_is_add patterns for INC. (#87584) Should fix the cases noted in #86857 --- llvm/lib/Target/X86/X86InstrCompiler.td | 5 +++++ llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll | 11 +++++++---- llvm/test/CodeGen/X86/load-local-v3i129.ll | 4 ++-- llvm/test/CodeGen/X86/pr23664.ll | 2 +- 4 files changed, 15 insertions(+), 7 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrCompiler.td b/llvm/lib/Target/X86/X86InstrCompiler.td index ce3b6af..270dd32 100644 --- a/llvm/lib/Target/X86/X86InstrCompiler.td +++ b/llvm/lib/Target/X86/X86InstrCompiler.td @@ -2161,6 +2161,11 @@ multiclass EFLAGSDefiningPats { def : Pat<(X86sub_flag_nocf GR16:$src, -1), (!cast(INC16r#suffix) GR16:$src)>; def : Pat<(X86sub_flag_nocf GR32:$src, -1), (!cast(INC32r#suffix) GR32:$src)>; def : Pat<(X86sub_flag_nocf GR64:$src, -1), (!cast(INC64r#suffix) GR64:$src)>; + + def : Pat<(or_is_add GR8:$src, 1), (!cast(INC8r#suffix) GR8:$src)>; + def : Pat<(or_is_add GR16:$src, 1), (!cast(INC16r#suffix) GR16:$src)>; + def : Pat<(or_is_add GR32:$src, 1), (!cast(INC32r#suffix) GR32:$src)>; + def : Pat<(or_is_add GR64:$src, 1), (!cast(INC64r#suffix) GR64:$src)>; } } diff --git a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll index 609be3b..50e736a 100644 --- a/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll +++ b/llvm/test/CodeGen/X86/2009-05-23-dagcombine-shifts.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4 ; RUN: llc < %s | FileCheck %s ; Check that the shr(shl X, 56), 48) is not mistakenly turned into @@ -16,11 +17,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3 target triple = "x86_64-unknown-linux-gnu" define i64 @foo(i64 %b) nounwind readnone { -entry: ; CHECK-LABEL: foo: -; CHECK: movsbq %dil, %rax -; CHECK: shlq $8, %rax -; CHECK: orq $1, %rax +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: movsbq %dil, %rax +; CHECK-NEXT: shlq $8, %rax +; CHECK-NEXT: incq %rax +; CHECK-NEXT: retq +entry: %shl = shl i64 %b, 56 ; [#uses=1] %shr = ashr i64 %shl, 48 ; [#uses=1] %add5 = or i64 %shr, 1 ; [#uses=1] diff --git a/llvm/test/CodeGen/X86/load-local-v3i129.ll b/llvm/test/CodeGen/X86/load-local-v3i129.ll index 8fa7ce0..eb5d172 100644 --- a/llvm/test/CodeGen/X86/load-local-v3i129.ll +++ b/llvm/test/CodeGen/X86/load-local-v3i129.ll @@ -12,7 +12,7 @@ define void @_start() nounwind { ; FAST-SHLD-NEXT: shrq $2, %rcx ; FAST-SHLD-NEXT: shldq $2, %rdx, %rcx ; FAST-SHLD-NEXT: andq $-4, %rax -; FAST-SHLD-NEXT: orq $1, %rax +; FAST-SHLD-NEXT: incq %rax ; FAST-SHLD-NEXT: movq %rax, -40(%rsp) ; FAST-SHLD-NEXT: movq %rcx, -32(%rsp) ; FAST-SHLD-NEXT: orq $-2, -56(%rsp) @@ -23,7 +23,7 @@ define void @_start() nounwind { ; SLOW-SHLD: # %bb.0: # %Entry ; SLOW-SHLD-NEXT: movq -40(%rsp), %rax ; SLOW-SHLD-NEXT: andq $-4, %rax -; SLOW-SHLD-NEXT: orq $1, %rax +; SLOW-SHLD-NEXT: incq %rax ; SLOW-SHLD-NEXT: movq %rax, -40(%rsp) ; SLOW-SHLD-NEXT: orq $-2, -56(%rsp) ; SLOW-SHLD-NEXT: movq $-1, -48(%rsp) diff --git a/llvm/test/CodeGen/X86/pr23664.ll b/llvm/test/CodeGen/X86/pr23664.ll index 453e5db..8179602 100644 --- a/llvm/test/CodeGen/X86/pr23664.ll +++ b/llvm/test/CodeGen/X86/pr23664.ll @@ -6,7 +6,7 @@ define i2 @f(i32 %arg) { ; CHECK: # %bb.0: ; CHECK-NEXT: # kill: def $edi killed $edi def $rdi ; CHECK-NEXT: leal (%rdi,%rdi), %eax -; CHECK-NEXT: orb $1, %al +; CHECK-NEXT: incb %al ; CHECK-NEXT: # kill: def $al killed $al killed $eax ; CHECK-NEXT: retq %trunc = trunc i32 %arg to i1 -- cgit v1.1 From 1b761205f2686516cebadbcbc37f798197d9c482 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 4 Apr 2024 16:11:06 +0100 Subject: [APInt] Add a simpler overload of multiplicativeInverse (#87610) The current APInt::multiplicativeInverse takes a modulus which can be any value, but all in-tree callers use a power of two. Moreover, most callers want to use two to the power of the width of an existing APInt, which is awkward because 2^N is not representable as an N-bit APInt. Add a new overload of multiplicativeInverse which implicitly uses 2^BitWidth as the modulus. --- llvm/include/llvm/ADT/APInt.h | 3 +++ llvm/lib/Analysis/ScalarEvolution.cpp | 11 +++-------- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 5 +---- llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp | 23 +++++------------------ llvm/lib/Support/APInt.cpp | 13 +++++++++++++ llvm/unittests/ADT/APIntTest.cpp | 3 ++- 6 files changed, 27 insertions(+), 31 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index b9b39f3..bd17162 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1743,6 +1743,9 @@ public: /// \returns the multiplicative inverse for a given modulo. APInt multiplicativeInverse(const APInt &modulo) const; + /// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. + APInt multiplicativeInverse() const; + /// @} /// \name Building-block Operations for APInt and APFloat /// @{ diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index 515b9d0..e030b9f 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -944,10 +944,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by // K! / 2^T. - APInt Mod = APInt::getSignedMinValue(W+1); - APInt MultiplyFactor = OddFactorial.zext(W+1); - MultiplyFactor = MultiplyFactor.multiplicativeInverse(Mod); - MultiplyFactor = MultiplyFactor.trunc(W); + APInt MultiplyFactor = OddFactorial.multiplicativeInverse(); // Calculate the product, at width T+W IntegerType *CalculationTy = IntegerType::get(SE.getContext(), @@ -10086,10 +10083,8 @@ static const SCEV *SolveLinEquationWithOverflow(const APInt &A, const SCEV *B, // If D == 1, (N / D) == N == 2^BW, so we need one extra bit to represent // (N / D) in general. The inverse itself always fits into BW bits, though, // so we immediately truncate it. - APInt AD = A.lshr(Mult2).zext(BW + 1); // AD = A / D - APInt Mod(BW + 1, 0); - Mod.setBit(BW - Mult2); // Mod = N / D - APInt I = AD.multiplicativeInverse(Mod).trunc(BW); + APInt AD = A.lshr(Mult2).trunc(BW - Mult2); // AD = A / D + APInt I = AD.multiplicativeInverse().zext(BW); // 4. Compute the minimum unsigned root of the equation: // I * (B / D) mod (N / D) diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 062132c..719209e 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -5201,10 +5201,7 @@ MachineInstr *CombinerHelper::buildSDivUsingMul(MachineInstr &MI) { // Calculate the multiplicative inverse modulo BW. // 2^W requires W + 1 bits, so we have to extend and then truncate. - unsigned W = Divisor.getBitWidth(); - APInt Factor = Divisor.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(MIB.buildConstant(ScalarShiftAmtTy, Shift).getReg(0)); Factors.push_back(MIB.buildConstant(ScalarTy, Factor).getReg(0)); return true; diff --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 5e053f9..409d66a 100644 --- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -6071,11 +6071,7 @@ static SDValue BuildExactSDIV(const TargetLowering &TLI, SDNode *N, Divisor.ashrInPlace(Shift); UseSRA = true; } - // Calculate the multiplicative inverse, using Newton's method. - APInt t; - APInt Factor = Divisor; - while ((t = Divisor * Factor) != 1) - Factor *= APInt(Divisor.getBitWidth(), 2) - t; + APInt Factor = Divisor.multiplicativeInverse(); Shifts.push_back(DAG.getConstant(Shift, dl, ShSVT)); Factors.push_back(DAG.getConstant(Factor, dl, SVT)); return true; @@ -6664,10 +6660,7 @@ TargetLowering::prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // Q = floor((2^W - 1) u/ D) @@ -6922,10 +6915,7 @@ TargetLowering::prepareSREMEqFold(EVT SETCCVT, SDValue REMNode, // P = inv(D0, 2^W) // 2^W requires W + 1 bits, so we have to extend and then truncate. unsigned W = D.getBitWidth(); - APInt P = D0.zext(W + 1) - .multiplicativeInverse(APInt::getSignedMinValue(W + 1)) - .trunc(W); - assert(!P.isZero() && "No multiplicative inverse!"); // unreachable + APInt P = D0.multiplicativeInverse(); assert((D0 * P).isOne() && "Multiplicative inverse basic check failed."); // A = floor((2^(W - 1) - 1) / D0) & -2^K @@ -7651,7 +7641,7 @@ bool TargetLowering::expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, // // For division, we can compute the remainder using the algorithm described // above, subtract it from the dividend to get an exact multiple of Constant. -// Then multiply that extact multiply by the multiplicative inverse modulo +// Then multiply that exact multiply by the multiplicative inverse modulo // (1 << (BitWidth / 2)) to get the quotient. // If Constant is even, we can shift right the dividend and the divisor by the @@ -7786,10 +7776,7 @@ bool TargetLowering::expandDIVREMByConstant(SDNode *N, // Multiply by the multiplicative inverse of the divisor modulo // (1 << BitWidth). - APInt Mod = APInt::getSignedMinValue(BitWidth + 1); - APInt MulFactor = Divisor.zext(BitWidth + 1); - MulFactor = MulFactor.multiplicativeInverse(Mod); - MulFactor = MulFactor.trunc(BitWidth); + APInt MulFactor = Divisor.multiplicativeInverse(); SDValue Quotient = DAG.getNode(ISD::MUL, dl, VT, Dividend, DAG.getConstant(MulFactor, dl, VT)); diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index c206097..f8f699f 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -1289,6 +1289,19 @@ APInt APInt::multiplicativeInverse(const APInt& modulo) const { return std::move(t[i]); } +/// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. +APInt APInt::multiplicativeInverse() const { + assert((*this)[0] && + "multiplicative inverse is only defined for odd numbers!"); + + // Use Newton's method. + APInt Factor = *this; + APInt T; + while (!(T = *this * Factor).isOne()) + Factor *= 2 - T; + return Factor; +} + /// Implementation of Knuth's Algorithm D (Division of nonnegative integers) /// from "Art of Computer Programming, Volume 2", section 4.3.1, p. 272. The /// variables here have the same names as in the algorithm. Comments explain diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index d5ef63e..23f9ee2 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -3257,9 +3257,10 @@ TEST(APIntTest, MultiplicativeInverseExaustive) { .multiplicativeInverse(APInt::getSignedMinValue(BitWidth + 1)) .trunc(BitWidth); APInt One = V * MulInv; - if (!V.isZero() && V.countr_zero() == 0) { + if (V[0]) { // Multiplicative inverse exists for all odd numbers. EXPECT_TRUE(One.isOne()); + EXPECT_TRUE((V * V.multiplicativeInverse()).isOne()); } else { // Multiplicative inverse does not exist for even numbers (and 0). EXPECT_TRUE(MulInv.isZero()); -- cgit v1.1 From eb3819073ea7aa6ee06b698da5e8f8a9d08dff75 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Thu, 4 Apr 2024 17:25:44 +0200 Subject: [libc++][oss-fuzz] Updates C++ version used. (#87531) This version update allows testing of `std::format` and the chrono timezone parsing in oss-fuzz. --- libcxx/utils/ci/oss-fuzz.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libcxx/utils/ci/oss-fuzz.sh b/libcxx/utils/ci/oss-fuzz.sh index e572340..03b59b2 100755 --- a/libcxx/utils/ci/oss-fuzz.sh +++ b/libcxx/utils/ci/oss-fuzz.sh @@ -23,7 +23,7 @@ for test in libcxx/test/libcxx/fuzzing/*.pass.cpp; do exe="$(basename ${test})" exe="${exe%.pass.cpp}" ${CXX} ${CXXFLAGS} \ - -std=c++14 \ + -std=c++20 \ -DLIBCPP_OSS_FUZZ \ -D_LIBCPP_HAS_NO_VENDOR_AVAILABILITY_ANNOTATIONS \ -nostdinc++ -cxx-isystem ${INSTALL}/include/c++/v1 \ -- cgit v1.1 From 5fd9babbfcd02bae431d5b280da59adddc2824d3 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 16:54:21 +0100 Subject: [X86] Rename Zn3FPP# ports -> Zn3FP#. NFC Matches Zn4FP# (which is mostly a copy) and avoids an issue in llvm-exegesis which is terrible at choosing the right portname when they have aliases. --- llvm/lib/Target/X86/X86ScheduleZnver3.td | 86 ++++---- .../llvm-mca/X86/Znver3/dependency-breaking-gpr.s | 72 +++---- .../llvm-mca/X86/Znver3/mulx-hi-read-advance.s | 24 +-- .../tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s | 24 +-- .../tools/llvm-mca/X86/Znver3/mulx-read-advance.s | 24 +-- .../tools/llvm-mca/X86/Znver3/mulx-same-regs.s | 24 +-- .../tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s | 48 ++--- .../tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s | 48 ++--- .../tools/llvm-mca/X86/Znver3/one-idioms-mmx.s | 36 ++-- .../tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s | 48 ++--- .../llvm-mca/X86/Znver3/partial-reg-update-3.s | 12 +- .../llvm-mca/X86/Znver3/partial-reg-update-4.s | 12 +- .../llvm-mca/X86/Znver3/partial-reg-update-5.s | 12 +- .../llvm-mca/X86/Znver3/partial-reg-update-6.s | 12 +- .../X86/Znver3/reg-move-elimination-avx-xmm.s | 72 +++---- .../X86/Znver3/reg-move-elimination-avx-ymm.s | 72 +++---- .../llvm-mca/X86/Znver3/reg-move-elimination-gpr.s | 48 ++--- .../llvm-mca/X86/Znver3/reg-move-elimination-mmx.s | 12 +- .../X86/Znver3/reg-move-elimination-sse-xmm.s | 72 +++---- .../llvm-mca/X86/Znver3/reg-move-elimination-x87.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-adx.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-aes.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-avx1.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-avx2.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-bmi1.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-bmi2.s | 12 +- .../llvm-mca/X86/Znver3/resources-clflushopt.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-clzero.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-cmov.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-cmpxchg.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-f16c.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-fma.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-fsgsbase.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-lea.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-lzcnt.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-mmx.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-movbe.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-mwaitx.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-pclmul.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-popcnt.s | 12 +- .../llvm-mca/X86/Znver3/resources-prefetchw.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-rdrand.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-rdseed.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-sha.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse1.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse2.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse3.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse41.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse42.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-sse4a.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-ssse3.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-vaes.s | 12 +- .../llvm-mca/X86/Znver3/resources-vpclmulqdq.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-x86_32.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-x86_64.s | 12 +- .../test/tools/llvm-mca/X86/Znver3/resources-x87.s | 12 +- .../tools/llvm-mca/X86/Znver3/resources-xsave.s | 12 +- .../llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s | 216 +++++++++---------- .../llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s | 240 ++++++++++----------- .../tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s | 48 ++--- .../llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s | 216 +++++++++---------- 61 files changed, 967 insertions(+), 967 deletions(-) diff --git a/llvm/lib/Target/X86/X86ScheduleZnver3.td b/llvm/lib/Target/X86/X86ScheduleZnver3.td index d90c8bd..2e87d52 100644 --- a/llvm/lib/Target/X86/X86ScheduleZnver3.td +++ b/llvm/lib/Target/X86/X86ScheduleZnver3.td @@ -52,7 +52,7 @@ def Znver3Model : SchedMachineModel { int VecLoadLatency = 7; // Latency of a simple store operation. int StoreLatency = 1; - // FIXME + // FIXME: let HighLatency = 25; // FIXME: any better choice? // AMD SOG 19h, 2.8 Optimizing Branching // The branch misprediction penalty is in the range from 11 to 18 cycles, @@ -193,11 +193,11 @@ def Zn3Int : ProcResGroup<[Zn3ALU0, Zn3AGU0, Zn3BRU0, // scheduler 0 // <...>, and six FPU pipes. // Agner, 22.10 Floating point execution pipes // There are six floating point/vector execution pipes, -def Zn3FPP0 : ProcResource<1>; -def Zn3FPP1 : ProcResource<1>; -def Zn3FPP2 : ProcResource<1>; -def Zn3FPP3 : ProcResource<1>; -def Zn3FPP45 : ProcResource<2>; +def Zn3FP0 : ProcResource<1>; +def Zn3FP1 : ProcResource<1>; +def Zn3FP2 : ProcResource<1>; +def Zn3FP3 : ProcResource<1>; +def Zn3FP45 : ProcResource<2>; // // Execution Units @@ -205,63 +205,63 @@ def Zn3FPP45 : ProcResource<2>; // AMD SOG 19h, 2.11.1 Floating Point Execution Resources // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) -defvar Zn3FPFMul0 = Zn3FPP0; -defvar Zn3FPFMul1 = Zn3FPP1; +defvar Zn3FPFMul0 = Zn3FP0; +defvar Zn3FPFMul1 = Zn3FP1; // (v)FADD* -defvar Zn3FPFAdd0 = Zn3FPP2; -defvar Zn3FPFAdd1 = Zn3FPP3; +defvar Zn3FPFAdd0 = Zn3FP2; +defvar Zn3FPFAdd1 = Zn3FP3; // All convert operations except pack/unpack -defvar Zn3FPFCvt0 = Zn3FPP2; -defvar Zn3FPFCvt1 = Zn3FPP3; +defvar Zn3FPFCvt0 = Zn3FP2; +defvar Zn3FPFCvt1 = Zn3FP3; // All Divide and Square Root except Reciprocal Approximation // AMD SOG 19h, 2.11.1 Floating Point Execution Resources // FDIV unit can support 2 simultaneous operations in flight // even though it occupies a single pipe. // FIXME: BufferSize=2 ? -defvar Zn3FPFDiv = Zn3FPP1; +defvar Zn3FPFDiv = Zn3FP1; // Moves and Logical operations on Floating Point Data Types -defvar Zn3FPFMisc0 = Zn3FPP0; -defvar Zn3FPFMisc1 = Zn3FPP1; -defvar Zn3FPFMisc2 = Zn3FPP2; -defvar Zn3FPFMisc3 = Zn3FPP3; +defvar Zn3FPFMisc0 = Zn3FP0; +defvar Zn3FPFMisc1 = Zn3FP1; +defvar Zn3FPFMisc2 = Zn3FP2; +defvar Zn3FPFMisc3 = Zn3FP3; // Integer Adds, Subtracts, and Compares // Some complex VADD operations are not available in all pipes. -defvar Zn3FPVAdd0 = Zn3FPP0; -defvar Zn3FPVAdd1 = Zn3FPP1; -defvar Zn3FPVAdd2 = Zn3FPP2; -defvar Zn3FPVAdd3 = Zn3FPP3; +defvar Zn3FPVAdd0 = Zn3FP0; +defvar Zn3FPVAdd1 = Zn3FP1; +defvar Zn3FPVAdd2 = Zn3FP2; +defvar Zn3FPVAdd3 = Zn3FP3; // Integer Multiplies, SAD, Blendvb -defvar Zn3FPVMul0 = Zn3FPP0; -defvar Zn3FPVMul1 = Zn3FPP3; +defvar Zn3FPVMul0 = Zn3FP0; +defvar Zn3FPVMul1 = Zn3FP3; // Data Shuffles, Packs, Unpacks, Permute // Some complex shuffle operations are only available in pipe1. -defvar Zn3FPVShuf = Zn3FPP1; -defvar Zn3FPVShufAux = Zn3FPP2; +defvar Zn3FPVShuf = Zn3FP1; +defvar Zn3FPVShufAux = Zn3FP2; // Bit Shift Left/Right operations -defvar Zn3FPVShift0 = Zn3FPP1; -defvar Zn3FPVShift1 = Zn3FPP2; +defvar Zn3FPVShift0 = Zn3FP1; +defvar Zn3FPVShift1 = Zn3FP2; // Moves and Logical operations on Packed Integer Data Types -defvar Zn3FPVMisc0 = Zn3FPP0; -defvar Zn3FPVMisc1 = Zn3FPP1; -defvar Zn3FPVMisc2 = Zn3FPP2; -defvar Zn3FPVMisc3 = Zn3FPP3; +defvar Zn3FPVMisc0 = Zn3FP0; +defvar Zn3FPVMisc1 = Zn3FP1; +defvar Zn3FPVMisc2 = Zn3FP2; +defvar Zn3FPVMisc3 = Zn3FP3; // *AES* -defvar Zn3FPAES0 = Zn3FPP0; -defvar Zn3FPAES1 = Zn3FPP1; +defvar Zn3FPAES0 = Zn3FP0; +defvar Zn3FPAES1 = Zn3FP1; // *CLM* -defvar Zn3FPCLM0 = Zn3FPP0; -defvar Zn3FPCLM1 = Zn3FPP1; +defvar Zn3FPCLM0 = Zn3FP0; +defvar Zn3FPCLM1 = Zn3FP1; // Execution pipeline grouping //===----------------------------------------------------------------------===// @@ -269,7 +269,7 @@ defvar Zn3FPCLM1 = Zn3FPP1; // AMD SOG 19h, 2.11 Floating-Point Unit // Stores and floating point to general purpose register transfer // have 2 dedicated pipelines (pipe 5 and 6). -def Zn3FPU0123 : ProcResGroup<[Zn3FPP0, Zn3FPP1, Zn3FPP2, Zn3FPP3]>; +def Zn3FPU0123 : ProcResGroup<[Zn3FP0, Zn3FP1, Zn3FP2, Zn3FP3]>; // (v)FMUL*, (v)FMA*, Floating Point Compares, Blendv(DQ) def Zn3FPFMul01 : ProcResGroup<[Zn3FPFMul0, Zn3FPFMul1]>; @@ -293,12 +293,12 @@ def Zn3FPFMisc12 : ProcResGroup<[Zn3FPFMisc1, Zn3FPFMisc2]>; // AMD SOG 19h, 2.11 Floating-Point Unit // Stores and floating point to general purpose register transfer // have 2 dedicated pipelines (pipe 5 and 6). -defvar Zn3FPLd01 = Zn3FPP45; +defvar Zn3FPLd01 = Zn3FP45; // AMD SOG 19h, 2.11 Floating-Point Unit // Note that FP stores are supported on two pipelines, // but throughput is limited to one per cycle. -let Super = Zn3FPP45 in +let Super = Zn3FP45 in def Zn3FPSt : ProcResource<1>; // Integer Adds, Subtracts, and Compares @@ -345,8 +345,8 @@ def Zn3FpPRF : RegisterFile<160, [VR64, VR128, VR256], [1, 1, 1], [0, 1, 1], // AMD SOG 19h, 2.11 Floating-Point Unit // <...> the scheduler can issue 1 micro op per cycle for each pipe. // FIXME: those are two separate schedulers, not a single big one. -def Zn3FP : ProcResGroup<[Zn3FPP0, Zn3FPP2, /*Zn3FPP4,*/ // scheduler 0 - Zn3FPP1, Zn3FPP3, Zn3FPP45 /*Zn3FPP5*/ // scheduler 1 +def Zn3FP : ProcResGroup<[Zn3FP0, Zn3FP2, /*Zn3FP4,*/ // scheduler 0 + Zn3FP1, Zn3FP3, Zn3FP45 /*Zn3FP5*/ // scheduler 1 ]> { let BufferSize = !mul(2, 32); } @@ -838,9 +838,9 @@ defm : Zn3WriteResInt; defm : Zn3WriteResIntPair; // FIXME: not from llvm-exegesis // Floating point. This covers both scalar and vector operations. -defm : Zn3WriteResInt; -defm : Zn3WriteResInt; -defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; +defm : Zn3WriteResInt; defm : Zn3WriteResXMM; defm : Zn3WriteResXMM; defm : Zn3WriteResYMM; diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s index 4654ce1..349abec 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/dependency-breaking-gpr.s @@ -68,12 +68,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -145,12 +145,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -223,12 +223,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -306,12 +306,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -389,12 +389,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -472,12 +472,12 @@ cmovael %eax, %ecx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s index 12d6f39..0fcd6f5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-hi-read-advance.s @@ -46,12 +46,12 @@ add %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -122,12 +122,12 @@ add %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s index 93f8d76..cd427bb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-lo-reg-use.s @@ -41,12 +41,12 @@ mulxq %rax, %rax, %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -112,12 +112,12 @@ mulxq %rax, %rax, %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s index 13ef5bc..bf82486 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-read-advance.s @@ -43,12 +43,12 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -115,12 +115,12 @@ mulxq (%rdi), %rax, %rdx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s index bfe8be8..8a5a014 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/mulx-same-regs.s @@ -44,12 +44,12 @@ mulxq %rax, %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -116,12 +116,12 @@ mulxq %rax, %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s index 1431875..f0e16a8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-xmm.s @@ -68,12 +68,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s index eb2bb97..97f6a34 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-avx-ymm.s @@ -68,12 +68,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ vpaddq %ymm0, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s index 5909af8..c733f63 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-mmx.s @@ -63,12 +63,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -154,12 +154,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -245,12 +245,12 @@ paddd %mm0, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s index 5a05487..63df99e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/one-idioms-sse-xmm.s @@ -68,12 +68,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -159,12 +159,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -250,12 +250,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -341,12 +341,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s index 7ac674c..66c1322 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-3.s @@ -40,12 +40,12 @@ xor %bx, %dx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s index 582da14..4ed529e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-4.s @@ -40,12 +40,12 @@ add %cx, %bx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s index dda87e9..5894111 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-5.s @@ -33,12 +33,12 @@ lzcnt %ax, %bx ## partial register stall. # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s index 71520ea..fdbf4d9 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/partial-reg-update-6.s @@ -42,12 +42,12 @@ lzcnt 2(%rsp), %cx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s index 7afa80c..f3e515c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-xmm.s @@ -180,12 +180,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ vmovdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s index 8b81d55..a484a75 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-avx-ymm.s @@ -180,12 +180,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ vmovdqu %ymm15, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s index f359048..eb20d13 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-gpr.s @@ -134,12 +134,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -402,12 +402,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -670,12 +670,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -938,12 +938,12 @@ xchgq %r15, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s index b556fd6..e17d671 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-mmx.s @@ -61,12 +61,12 @@ movq %mm7, %mm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s index 147cb0f..b45fd17 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-sse-xmm.s @@ -180,12 +180,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -474,12 +474,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -768,12 +768,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1062,12 +1062,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1356,12 +1356,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1650,12 +1650,12 @@ movdqu %xmm15, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s index de59edf..0465d41 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/reg-move-elimination-x87.s @@ -67,12 +67,12 @@ fxch %st(0) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s index 4e024e5..9c5a19b 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-adx.s @@ -38,12 +38,12 @@ adox (%rbx), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s index 5abf3cc..d108696 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-aes.s @@ -50,12 +50,12 @@ aeskeygenassist $22, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s index 146b3ce..4f0b484 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx1.s @@ -1731,12 +1731,12 @@ vzeroupper # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s index 3c6b31a..1a8b9e2 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-avx2.s @@ -771,12 +771,12 @@ vpxor (%rax), %ymm1, %ymm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s index 8c0e841..2600237 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi1.s @@ -85,12 +85,12 @@ tzcnt (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s index 8d00c99..0664c1d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-bmi2.s @@ -100,12 +100,12 @@ shrx %rax, (%rbx), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s index 3e7219c..b40d155 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clflushopt.s @@ -23,12 +23,12 @@ clflushopt (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s index 0dc89fa..0f9935c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-clzero.s @@ -23,12 +23,12 @@ clzero # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s index e0e46af..8118e40 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmov.s @@ -218,12 +218,12 @@ cmovgq (%rax), %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s index 03763e5..9ab8776 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-cmpxchg.s @@ -25,12 +25,12 @@ cmpxchg16b (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s index bb995d5..345ae02 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-f16c.s @@ -40,12 +40,12 @@ vcvtps2ph $0, %ymm0, (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s index 9af180d..af207f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fma.s @@ -500,12 +500,12 @@ vfnmsub231ss (%rax), %xmm1, %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s index 142508c..3e65183 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-fsgsbase.s @@ -40,12 +40,12 @@ wrgsbase %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s index 1545a22..0257202 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lea.s @@ -293,12 +293,12 @@ lea 1024(%rax, %rbx, 2), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s index ffbe414..735287a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-lzcnt.s @@ -35,12 +35,12 @@ lzcntq (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s index 75dbf95..2bc6177 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mmx.s @@ -279,12 +279,12 @@ pxor (%rax), %mm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s index 144e97f..6eeabbd 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-movbe.s @@ -35,12 +35,12 @@ movbe (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s index 3b343d7..103fd3eb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-mwaitx.s @@ -25,12 +25,12 @@ mwaitx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s index 2d9f0e9..893f476 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-pclmul.s @@ -25,12 +25,12 @@ pclmulqdq $11, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s index cce078f..29bcc5c 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-popcnt.s @@ -35,12 +35,12 @@ popcntq (%rax), %rcx # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s index 5423b6b..b80e8f7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-prefetchw.s @@ -25,12 +25,12 @@ prefetchw (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s index fb09253..649eb10 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdrand.s @@ -27,12 +27,12 @@ rdrand %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s index f10a90f..44e0eeb 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-rdseed.s @@ -27,12 +27,12 @@ rdseed %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s index 360a667..e6d5ab9 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sha.s @@ -55,12 +55,12 @@ sha256rnds2 (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s index 9816b87..4c7a3f0 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse1.s @@ -328,12 +328,12 @@ xorps (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s index f69c535..d24aebf 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse2.s @@ -684,12 +684,12 @@ xorpd (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s index 8110390..51bb95f 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse3.s @@ -74,12 +74,12 @@ mwait # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s index 0cc6c6a..e952a16 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse41.s @@ -261,12 +261,12 @@ roundss $1, (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s index 873e4f4..8afcd80 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse42.s @@ -70,12 +70,12 @@ pcmpgtq (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s index 1c1b0b2..6606a3e 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-sse4a.s @@ -35,12 +35,12 @@ movntss %xmm0, (%rax) # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s index aeec493..6668870 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-ssse3.s @@ -180,12 +180,12 @@ psignw (%rax), %xmm2 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s index 076094f..81afc7d 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vaes.s @@ -40,12 +40,12 @@ vaesenclast (%rax), %ymm1, %ymm3 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s index 31680d5..10440e9 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-vpclmulqdq.s @@ -25,12 +25,12 @@ vpclmulqdq $11, (%rax), %ymm1, %ymm3 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s index fb09b65..8f627ca 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_32.s @@ -56,12 +56,12 @@ salc # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s index fedb3d2..41ec631 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x86_64.s @@ -1957,12 +1957,12 @@ xorq (%rax), %rdi # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s index 9a92bd0..cd8a06a 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-x87.s @@ -364,12 +364,12 @@ fyl2xp1 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s b/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s index 819361c..f348ff8 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/resources-xsave.s @@ -35,12 +35,12 @@ xsetbv # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s index 33657e6..ed4e8f9 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-xmm.s @@ -138,12 +138,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -229,12 +229,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -320,12 +320,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -411,12 +411,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -502,12 +502,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -593,12 +593,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -684,12 +684,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -775,12 +775,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -866,12 +866,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -957,12 +957,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1048,12 +1048,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1139,12 +1139,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1230,12 +1230,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1321,12 +1321,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1412,12 +1412,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1503,12 +1503,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1594,12 +1594,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1685,12 +1685,12 @@ vpaddq %xmm0, %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s index ba7f51e..2404336 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-avx-ymm.s @@ -148,12 +148,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -239,12 +239,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -330,12 +330,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -421,12 +421,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -512,12 +512,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -603,12 +603,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -694,12 +694,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -785,12 +785,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -876,12 +876,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -967,12 +967,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1058,12 +1058,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1149,12 +1149,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1240,12 +1240,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1331,12 +1331,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1422,12 +1422,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1513,12 +1513,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1604,12 +1604,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1695,12 +1695,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1786,12 +1786,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1878,12 +1878,12 @@ vpxor %ymm1, %ymm0, %ymm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s index 018adc2..4d648f7 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-gpr.s @@ -68,12 +68,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -175,12 +175,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -282,12 +282,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -389,12 +389,12 @@ addq %rax, %rax # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU diff --git a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s index 935881a..aca39c5 100644 --- a/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s +++ b/llvm/test/tools/llvm-mca/X86/Znver3/zero-idioms-sse-xmm.s @@ -138,12 +138,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -229,12 +229,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -320,12 +320,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -411,12 +411,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -502,12 +502,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -593,12 +593,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -684,12 +684,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -775,12 +775,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -866,12 +866,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -957,12 +957,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1048,12 +1048,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1139,12 +1139,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1230,12 +1230,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1321,12 +1321,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1412,12 +1412,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1503,12 +1503,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1594,12 +1594,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU @@ -1685,12 +1685,12 @@ paddq %xmm0, %xmm0 # CHECK-NEXT: [5] - Zn3ALU2 # CHECK-NEXT: [6] - Zn3ALU3 # CHECK-NEXT: [7] - Zn3BRU1 -# CHECK-NEXT: [8] - Zn3FPP0 -# CHECK-NEXT: [9] - Zn3FPP1 -# CHECK-NEXT: [10] - Zn3FPP2 -# CHECK-NEXT: [11] - Zn3FPP3 -# CHECK-NEXT: [12.0] - Zn3FPP45 -# CHECK-NEXT: [12.1] - Zn3FPP45 +# CHECK-NEXT: [8] - Zn3FP0 +# CHECK-NEXT: [9] - Zn3FP1 +# CHECK-NEXT: [10] - Zn3FP2 +# CHECK-NEXT: [11] - Zn3FP3 +# CHECK-NEXT: [12.0] - Zn3FP45 +# CHECK-NEXT: [12.1] - Zn3FP45 # CHECK-NEXT: [13] - Zn3FPSt # CHECK-NEXT: [14.0] - Zn3LSU # CHECK-NEXT: [14.1] - Zn3LSU -- cgit v1.1 From d3fe2b538d53373f03ae096ccc05d07cb4d3ff3a Mon Sep 17 00:00:00 2001 From: Andrzej Warzynski Date: Thu, 4 Apr 2024 17:12:37 +0100 Subject: Revert "[mlir][test] Make SME e2e tests require an emulator (#86489)" This reverts commit 7b5255297dca377a37c8df066e9d9749ab96cfad. Broken bot: * https://lab.llvm.org/buildbot/#/builders/179/builds/9794 --- mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake | 101 --------------------- mlir/docs/Dialects/ArmSME.md | 10 +- mlir/test/CMakeLists.txt | 6 -- 3 files changed, 1 insertion(+), 116 deletions(-) delete mode 100644 mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake diff --git a/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake b/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake deleted file mode 100644 index fff0424..0000000 --- a/mlir/cmake/modules/MLIRCheckHardwareFeatures.cmake +++ /dev/null @@ -1,101 +0,0 @@ -# A collection of helper CMake functions to detect hardware capabilities. At -# the moment these are used when configuring MLIR integration tests. - -# Checks whether the specified hardware capability is supported by the host -# Linux system. This is implemented by checking auxiliary vector feature -# provided by the Linux kernel. -# -# check_hwcap( -# hwcap_spec -# output_var -# ) -# -# hwcap_spec - HWCAP value to check - these are defined in hwcap.h in the Linux -# kernel. -# -# output_var - Output variable to use to save the results (TRUE for supported, -# FALSE for not supported). -# -# EXAMPLES: -# -# check_hwcap("HWCAP2_SME" SME_EMULATOR_REQUIRED) -# -function(check_hwcap hwcap_spec output) - set(hwcap_test_src - [====[ - #include - #include - - int main(void) - { - long hwcaps = getauxval(AT_); - return (hwcaps & ) != 0; - } - ]====] - ) - - # Extract from $hwcap_spec whether this is AT_HWCAP or AT_HWCAP2 - string(FIND ${hwcap_spec} "_" wsloc) - string(SUBSTRING ${hwcap_spec} 0 ${wsloc} hwcap_vec) - - string(REPLACE "" ${hwcap_vec} hwcap_test_src "${hwcap_test_src}") - string(REPLACE "" ${hwcap_spec} hwcap_test_src "${hwcap_test_src}") - - set(hwcap_test_file ${CMAKE_BINARY_DIR}/${CMAKE_FILES_DIRECTORY}/hwcap_check.c) - file(WRITE ${hwcap_test_file} "${hwcap_test_src}") - - # Compile _and_ run - try_run( - test_run_result test_compile_result - ${CMAKE_BINARY_DIR} - ${hwcap_test_file} - ) - # Compilation will fail if hwcap_spec is not defined - this usually means - # that your Linux kernel is too old. - if(${test_compile_result} AND (DEFINED test_run_result)) - message(${test_run_result}) - message(STATUS "Checking whether ${hwcap_spec} is supported by the host system: ${test_run_result}") - set(${output} ${test_run_result} PARENT_SCOPE) - else() - message(STATUS "Checking whether ${hwcap_spec} is supported by the host system: FALSE") - endif() -endfunction(check_hwcap) - -# For the given group of e2e tests (defined by the `mlir_e2e_tests` flag), -# checks whether an emulator is required. If yes, verifies that the -# corresponding CMake var pointing to an emulator (`emulator_exec`) has been -# set. -# -# check_emulator( -# mlir_e2e_tests -# hwcap_spec -# emulator_exec -# ) -# -# mlir_e2e_tests - MLIR CMake variables corresponding to the group of e2e tests -# to check -# hwcap_spec - HWCAP value to check. This should correspond to the hardware -# capabilities required by the tests to be checked. Possible -# values are defined in hwcap.h in the Linux kernel. -# emulator_exec - variable the defines the emulator (ought to be set if -# required, can be empty otherwise). -# -# EXAMPLES: -# -# check_emulator(MLIR_RUN_ARM_SVE_TESTS "HWCAP_SVE" ARM_EMULATOR_EXECUTABLE) -# -function(check_emulator mlir_e2e_tests hwcap_spec emulator_exec) - if (NOT ${mlir_e2e_tests}) - return() - endif() - - check_hwcap(${hwcap_spec} emulator_not_required) - if (${emulator_not_required}) - return() - endif() - - if (${emulator_exec} STREQUAL "") - message(FATAL_ERROR "${mlir_e2e_tests} requires an emulator, but ${emulator_exec} is not set") - endif() - -endfunction() diff --git a/mlir/docs/Dialects/ArmSME.md b/mlir/docs/Dialects/ArmSME.md index ce0a76e..7326150 100644 --- a/mlir/docs/Dialects/ArmSME.md +++ b/mlir/docs/Dialects/ArmSME.md @@ -6,7 +6,7 @@ This dialect defines custom and LLVM IR intrinsic operations that are used to target Arm Scalable Matrix Extension. Through the available conversion and ArmSME passes you can, for example, lower a [linalg.matmul](https://mlir.llvm.org/docs/Dialects/Linalg/#linalgmatmul-linalgmatmulop) -operation to Arm SME +opereation to Arm SME [FMOPA](https://developer.arm.com/documentation/ddi0602/2023-03/SME-Instructions/FMOPA--widening---Half-precision-floating-point-sum-of-outer-products-and-accumulate-) (floating-point outer product) operations. See one of the in-tree end-to-end integration tests for reference: @@ -14,14 +14,6 @@ integration tests for reference: * [Linalg/CPU/ArmSME/matmul.mlir](https://github.com/llvm/llvm-project/blob/main/mlir/test/Integration/Dialect/Linalg/CPU/ArmSME/matmul.mlir) * [Vector/CPU/ArmSME/test-outerproduct-f64.mlir](https://github.com/llvm/llvm-project/blob/main/mlir/test/Integration/Dialect/Vector/CPU/ArmSME/test-outerproduct-f64.mlir) -In order to run ArmSME integration tests, include these flags in the CMake -invocation when configuring LLVM and MLIR: -```bash - -DMLIR_INCLUDE_INTEGRATION_TESTS=On - -DMLIR_RUN_ARM_SME_TESTS=On - -DARM_EMULATOR_EXECUTABLE= -``` - These tests are run "post-commit" by the [clang-aarch64-sve-vla](https://lab.llvm.org/buildbot/#/builders/197) LLVM BuildBot worker. diff --git a/mlir/test/CMakeLists.txt b/mlir/test/CMakeLists.txt index 5319a9c..baf07ea 100644 --- a/mlir/test/CMakeLists.txt +++ b/mlir/test/CMakeLists.txt @@ -1,5 +1,3 @@ -include(MLIRCheckHardwareFeatures) - add_subdirectory(CAPI) add_subdirectory(lib) @@ -41,10 +39,6 @@ if (MLIR_INCLUDE_INTEGRATION_TESTS) option(MLIR_RUN_ARM_SVE_TESTS "Run Arm SVE tests.") option(MLIR_RUN_ARM_SME_TESTS "Run Arm SME tests.") - # Check whether an emulator is required - if yes then make sure that it's - # been set. - check_emulator(MLIR_RUN_ARM_SVE_TESTS "HWCAP_SVE" ARM_EMULATOR_EXECUTABLE) - check_emulator(MLIR_RUN_ARM_SME_TESTS "HWCAP2_SME" ARM_EMULATOR_EXECUTABLE) # The native target may not be enabled when cross compiling, raise an error. if(NOT MLIR_ENABLE_EXECUTION_ENGINE) -- cgit v1.1 From 62740d87bcb4a43569bc88fa76c24d27a064dcf9 Mon Sep 17 00:00:00 2001 From: lntue <35648136+lntue@users.noreply.github.com> Date: Thu, 4 Apr 2024 12:15:32 -0400 Subject: [libc][bazel][math] Add float128 math functions and their smoke tests to bazel layout. (#87645) --- utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 79 +++++------ .../libc/test/src/math/smoke/BUILD.bazel | 147 +++++++++++++++++++++ 2 files changed, 183 insertions(+), 43 deletions(-) create mode 100644 utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index 9dfe4c4..d8375de 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -1696,16 +1696,14 @@ libc_math_function( ) libc_math_function(name = "fabs") - libc_math_function(name = "fabsf") - libc_math_function(name = "fabsl") +libc_math_function(name = "fabsf128") libc_math_function(name = "fdim") - libc_math_function(name = "fdimf") - libc_math_function(name = "fdiml") +libc_math_function(name = "fdimf128") libc_math_function( name = "ceil", @@ -1730,6 +1728,9 @@ libc_math_function( ], ) +libc_math_function(name = "ceilf128") + + libc_math_function( name = "floor", specializations = [ @@ -1747,12 +1748,12 @@ libc_math_function( ) libc_math_function(name = "floorl") +libc_math_function(name = "floorf128") libc_math_function(name = "ldexp") - libc_math_function(name = "ldexpf") - libc_math_function(name = "ldexpl") +libc_math_function(name = "ldexpf128") libc_math_function( name = "trunc", @@ -1771,6 +1772,7 @@ libc_math_function( ) libc_math_function(name = "truncl") +libc_math_function(name = "truncf128") libc_math_function( name = "round", @@ -1789,6 +1791,7 @@ libc_math_function( ) libc_math_function(name = "roundl") +libc_math_function(name = "roundf128") libc_math_function( name = "fmod", @@ -1805,10 +1808,9 @@ libc_math_function( ) libc_math_function(name = "frexp") - libc_math_function(name = "frexpf") - libc_math_function(name = "frexpl") +libc_math_function(name = "frexpf128") libc_math_function(name = "hypot") @@ -1820,40 +1822,32 @@ libc_math_function( ) libc_math_function(name = "logb") - libc_math_function(name = "logbf") - libc_math_function(name = "logbl") +libc_math_function(name = "logbf128") libc_math_function(name = "modf") - libc_math_function(name = "modff") - libc_math_function(name = "modfl") +libc_math_function(name = "modff128") libc_math_function(name = "remquo") - libc_math_function(name = "remquof") - libc_math_function(name = "remquol") libc_math_function(name = "remainder") - libc_math_function(name = "remainderf") - libc_math_function(name = "remainderl") libc_math_function(name = "fmin") - libc_math_function(name = "fminf") - libc_math_function(name = "fminl") +libc_math_function(name = "fminf128") libc_math_function(name = "fmax") - libc_math_function(name = "fmaxf") - libc_math_function(name = "fmaxl") +libc_math_function(name = "fmaxf128") libc_math_function( name = "cosf", @@ -1927,49 +1921,47 @@ libc_math_function( ], ) -libc_math_function(name = "copysign") +libc_math_function( + name = "sqrtf128", + additional_deps = [ + ":__support_fputil_sqrt", + ], +) +libc_math_function(name = "copysign") libc_math_function(name = "copysignf") - libc_math_function(name = "copysignl") - libc_math_function(name = "copysignf128") libc_math_function(name = "ilogb") - libc_math_function(name = "ilogbf") - libc_math_function(name = "ilogbl") +libc_math_function(name = "ilogbf128") libc_math_function(name = "rint") - libc_math_function(name = "rintf") - libc_math_function(name = "rintl") +libc_math_function(name = "rintf128") libc_math_function(name = "lrint") - libc_math_function(name = "lrintf") - libc_math_function(name = "lrintl") +libc_math_function(name = "lrintf128") libc_math_function(name = "llrint") - libc_math_function(name = "llrintf") - libc_math_function(name = "llrintl") +libc_math_function(name = "llrintf128") libc_math_function(name = "lround") - libc_math_function(name = "lroundf") - libc_math_function(name = "lroundl") +libc_math_function(name = "lroundf128") libc_math_function(name = "llround") - libc_math_function(name = "llroundf") - libc_math_function(name = "llroundl") +libc_math_function(name = "llroundf128") libc_math_function( name = "nan", @@ -1995,28 +1987,29 @@ libc_math_function( ], ) -libc_math_function(name = "nearbyint") +libc_math_function( + name = "nanf128", + additional_deps = [ + ":__support_str_to_float", + ":errno", + ], +) +libc_math_function(name = "nearbyint") libc_math_function(name = "nearbyintf") - libc_math_function(name = "nearbyintl") libc_math_function(name = "nextafter") - libc_math_function(name = "nextafterf") - libc_math_function(name = "nextafterl") +libc_math_function(name = "nextafterf128") libc_math_function(name = "nexttoward") - libc_math_function(name = "nexttowardf") - libc_math_function(name = "nexttowardl") libc_math_function(name = "scalbn") - libc_math_function(name = "scalbnf") - libc_math_function(name = "scalbnl") ############################## inttypes targets ############################## diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel new file mode 100644 index 0000000..0d69a48 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/libc/test/src/math/smoke/BUILD.bazel @@ -0,0 +1,147 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +# Smoke tests for LLVM libc math.h functions. + +load("//libc:libc_build_rules.bzl", "libc_support_library") +load("//libc/test/src/math:libc_math_test_rules.bzl", "math_test") + +package(default_visibility = ["//visibility:public"]) + +licenses(["notice"]) + +math_test( + name = "fabsf128", + hdrs = ["FAbsTest.h"], +) + +math_test( + name = "ceilf128", + hdrs = ["CeilTest.h"], +) + +math_test( + name = "floorf128", + hdrs = ["FloorTest.h"], +) + +math_test( + name = "truncf128", + hdrs = ["TruncTest.h"], +) + +math_test( + name = "roundf128", + hdrs = ["RoundTest.h"], +) + +math_test( + name = "frexpf128", + hdrs = ["FrexpTest.h"], +) + +math_test( + name = "logbf128", + hdrs = ["LogbTest.h"], +) + +math_test( + name = "modff128", + hdrs = ["ModfTest.h"], +) + +math_test( + name = "fminf128", + hdrs = ["FMinTest.h"], +) + +math_test( + name = "fmaxf128", + hdrs = ["FMaxTest.h"], +) + +math_test( + name = "sqrtf128", + hdrs = ["SqrtTest.h"], + deps = ["//libc:__support_cpp_bit"], +) + +math_test( + name = "copysignf128", + hdrs = ["CopySignTest.h"], +) + +math_test( + name = "ilogbf128", + hdrs = ["ILogbTest.h"], + deps = ["//libc:__support_cpp_limits"], +) + +math_test( + name = "fdimf128", + hdrs = ["FDimTest.h"], +) + +libc_support_library( + name = "ldexp_test_template", + hdrs = ["LdExpTest.h"], + deps = [ + "//libc:__support_cpp_limits", + "//libc:__support_fputil_fp_bits", + "//libc:__support_fputil_normal_float", + "//libc:llvm_libc_macros_math_macros", + "//libc/test/UnitTest:LibcUnitTest", + "//libc/test/UnitTest:fp_test_helpers", + ], +) + +math_test( + name = "ldexpf128", + hdrs = ["LdExpTest.h"], + deps = ["//libc:__support_cpp_limits"], +) + +math_test( + name = "rintf128", + hdrs = ["RIntTest.h"], +) + +math_test( + name = "lrintf128", + hdrs = ["RoundToIntegerTest.h"], +) + +math_test( + name = "llrintf128", + hdrs = ["RoundToIntegerTest.h"], +) +math_test( + name = "lroundf128", + hdrs = ["RoundToIntegerTest.h"], +) + +math_test( + name = "llroundf128", + hdrs = ["RoundToIntegerTest.h"], +) + +libc_support_library( + name = "nextafter_test_template", + hdrs = ["NextAfterTest.h"], + deps = [ + "//libc:__support_cpp_array", + "//libc:__support_cpp_bit", + "//libc:__support_cpp_type_traits", + "//libc:__support_fputil_basic_operations", + "//libc:__support_fputil_fp_bits", + "//libc:llvm_libc_macros_math_macros", + "//libc/test/UnitTest:LibcUnitTest", + "//libc/test/UnitTest:fp_test_helpers", + ], +) + +math_test( + name = "nextafterf128", + deps = [":nextafter_test_template"], +) -- cgit v1.1 From ed412494988411fc1aae2f1014c4ecad56d8085f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 17:17:47 +0100 Subject: [CostModel][X86] Update AVX1 sext v4i1 -> v4i64 cost based off worst case llvm-mca numbers We were using raw instruction count which overestimated the costs for #67803 --- llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 2 +- llvm/test/Analysis/CostModel/X86/cast.ll | 2 +- llvm/test/Analysis/CostModel/X86/extend.ll | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp index 2092675..cd61029 100644 --- a/llvm/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/llvm/lib/Target/X86/X86TargetTransformInfo.cpp @@ -2664,7 +2664,7 @@ InstructionCost X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, }; static const TypeConversionCostTblEntry AVXConversionTbl[] = { - { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 6 }, + { ISD::SIGN_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::ZERO_EXTEND, MVT::v4i64, MVT::v4i1, 4 }, { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i1, 4 }, diff --git a/llvm/test/Analysis/CostModel/X86/cast.ll b/llvm/test/Analysis/CostModel/X86/cast.ll index d80cb09..47487d6 100644 --- a/llvm/test/Analysis/CostModel/X86/cast.ll +++ b/llvm/test/Analysis/CostModel/X86/cast.ll @@ -374,7 +374,7 @@ define i32 @masks4(<4 x i1> %in) { ; ; AVX1-LABEL: 'masks4' ; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %Z = zext <4 x i1> %in to <4 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %S = sext <4 x i1> %in to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %S = sext <4 x i1> %in to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret i32 undef ; ; AVX2-LABEL: 'masks4' diff --git a/llvm/test/Analysis/CostModel/X86/extend.ll b/llvm/test/Analysis/CostModel/X86/extend.ll index 34fa3c4..4a2585a 100644 --- a/llvm/test/Analysis/CostModel/X86/extend.ll +++ b/llvm/test/Analysis/CostModel/X86/extend.ll @@ -1962,7 +1962,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; AVX1-LABEL: 'sext_vXi1' ; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> -; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = sext <16 x i1> undef to <16 x i64> ; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i64 = sext <32 x i1> undef to <32 x i64> @@ -2242,7 +2242,7 @@ define i32 @sext_vXi1() "min-legal-vector-width"="256" { ; BTVER2-LABEL: 'sext_vXi1' ; BTVER2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %I64 = sext i1 undef to i64 ; BTVER2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2i64 = sext <2 x i1> undef to <2 x i64> -; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> +; BTVER2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V4i64 = sext <4 x i1> undef to <4 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V8i64 = sext <8 x i1> undef to <8 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V16i64 = sext <16 x i1> undef to <16 x i64> ; BTVER2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V32i64 = sext <32 x i1> undef to <32 x i64> -- cgit v1.1 From 0b293e8c36d97bbd7f85ed5b67ce510ff7fd86ee Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 4 Apr 2024 17:24:16 +0100 Subject: [APInt] Remove multiplicativeInverse with explicit modulus (#87644) All callers have been changed to use the new simpler overload with an implicit modulus of 2^BitWidth. The old form was never used or tested with non-power-of-two modulus anyway. --- llvm/include/llvm/ADT/APInt.h | 3 --- llvm/lib/Support/APInt.cpp | 49 ---------------------------------------- llvm/unittests/ADT/APIntTest.cpp | 19 ++++------------ 3 files changed, 4 insertions(+), 67 deletions(-) diff --git a/llvm/include/llvm/ADT/APInt.h b/llvm/include/llvm/ADT/APInt.h index bd17162..8d3c029 100644 --- a/llvm/include/llvm/ADT/APInt.h +++ b/llvm/include/llvm/ADT/APInt.h @@ -1740,9 +1740,6 @@ public: return *this; } - /// \returns the multiplicative inverse for a given modulo. - APInt multiplicativeInverse(const APInt &modulo) const; - /// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. APInt multiplicativeInverse() const; diff --git a/llvm/lib/Support/APInt.cpp b/llvm/lib/Support/APInt.cpp index f8f699f..224ea09 100644 --- a/llvm/lib/Support/APInt.cpp +++ b/llvm/lib/Support/APInt.cpp @@ -1240,55 +1240,6 @@ APInt APInt::sqrt() const { return x_old + 1; } -/// Computes the multiplicative inverse of this APInt for a given modulo. The -/// iterative extended Euclidean algorithm is used to solve for this value, -/// however we simplify it to speed up calculating only the inverse, and take -/// advantage of div+rem calculations. We also use some tricks to avoid copying -/// (potentially large) APInts around. -/// WARNING: a value of '0' may be returned, -/// signifying that no multiplicative inverse exists! -APInt APInt::multiplicativeInverse(const APInt& modulo) const { - assert(ult(modulo) && "This APInt must be smaller than the modulo"); - - // Using the properties listed at the following web page (accessed 06/21/08): - // http://www.numbertheory.org/php/euclid.html - // (especially the properties numbered 3, 4 and 9) it can be proved that - // BitWidth bits suffice for all the computations in the algorithm implemented - // below. More precisely, this number of bits suffice if the multiplicative - // inverse exists, but may not suffice for the general extended Euclidean - // algorithm. - - APInt r[2] = { modulo, *this }; - APInt t[2] = { APInt(BitWidth, 0), APInt(BitWidth, 1) }; - APInt q(BitWidth, 0); - - unsigned i; - for (i = 0; r[i^1] != 0; i ^= 1) { - // An overview of the math without the confusing bit-flipping: - // q = r[i-2] / r[i-1] - // r[i] = r[i-2] % r[i-1] - // t[i] = t[i-2] - t[i-1] * q - udivrem(r[i], r[i^1], q, r[i]); - t[i] -= t[i^1] * q; - } - - // If this APInt and the modulo are not coprime, there is no multiplicative - // inverse, so return 0. We check this by looking at the next-to-last - // remainder, which is the gcd(*this,modulo) as calculated by the Euclidean - // algorithm. - if (r[i] != 1) - return APInt(BitWidth, 0); - - // The next-to-last t is the multiplicative inverse. However, we are - // interested in a positive inverse. Calculate a positive one from a negative - // one if necessary. A simple addition of the modulo suffices because - // abs(t[i]) is known to be less than *this/2 (see the link above). - if (t[i].isNegative()) - t[i] += modulo; - - return std::move(t[i]); -} - /// \returns the multiplicative inverse of an odd APInt modulo 2^BitWidth. APInt APInt::multiplicativeInverse() const { assert((*this)[0] && diff --git a/llvm/unittests/ADT/APIntTest.cpp b/llvm/unittests/ADT/APIntTest.cpp index 23f9ee2..76fc264 100644 --- a/llvm/unittests/ADT/APIntTest.cpp +++ b/llvm/unittests/ADT/APIntTest.cpp @@ -3249,22 +3249,11 @@ TEST(APIntTest, SolveQuadraticEquationWrap) { } TEST(APIntTest, MultiplicativeInverseExaustive) { - for (unsigned BitWidth = 1; BitWidth <= 16; ++BitWidth) { - for (unsigned Value = 0; Value < (1u << BitWidth); ++Value) { + for (unsigned BitWidth = 1; BitWidth <= 8; ++BitWidth) { + for (unsigned Value = 1; Value < (1u << BitWidth); Value += 2) { + // Multiplicative inverse exists for all odd numbers. APInt V = APInt(BitWidth, Value); - APInt MulInv = - V.zext(BitWidth + 1) - .multiplicativeInverse(APInt::getSignedMinValue(BitWidth + 1)) - .trunc(BitWidth); - APInt One = V * MulInv; - if (V[0]) { - // Multiplicative inverse exists for all odd numbers. - EXPECT_TRUE(One.isOne()); - EXPECT_TRUE((V * V.multiplicativeInverse()).isOne()); - } else { - // Multiplicative inverse does not exist for even numbers (and 0). - EXPECT_TRUE(MulInv.isZero()); - } + EXPECT_EQ(V * V.multiplicativeInverse(), 1); } } } -- cgit v1.1 From 9e3b64b9f95aadf57568576712902a272fe66503 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 4 Apr 2024 09:33:18 -0700 Subject: [llvm-objcopy] Add --compress-sections --compress-sections is similar to --compress-debug-sections but applies to arbitrary sections. * `--compress-sections
=none`: decompress sections * `--compress-sections
=[zlib|zstd]`: compress sections with zlib/zstd Like `--remove-section`, the pattern is by default a glob, but a regex when --regex is specified. For `--remove-section` like options, `!` prevents matches and is not dependent on ordering (see `ELF/wildcard-syntax.test`). Since `--compress-sections a=zlib --compress-sections a=none` naturally allows overriding, having an order-independent `!` would be confusing. Therefore, `!` is disallowed. Sections within a segment are effectively immutable. Report an error for an attempt to (de)compress them. `SHF_ALLOC` sections in a relocatable file can be compressed, but linkers usually reject them. Link: https://discourse.llvm.org/t/rfc-compress-arbitrary-sections-with-ld-lld-compress-sections/71674 Pull Request: https://github.com/llvm/llvm-project/pull/85036 --- llvm/docs/CommandGuide/llvm-objcopy.rst | 8 ++ llvm/docs/ReleaseNotes.rst | 4 + llvm/include/llvm/ObjCopy/CommonConfig.h | 3 + llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp | 34 ++++-- .../ELF/compress-sections-within-segment.s | 38 ++++++ .../tools/llvm-objcopy/ELF/compress-sections.s | 128 +++++++++++++++++++++ .../llvm-objcopy/ELF/decompress-sections.test | 29 +++++ llvm/tools/llvm-objcopy/ObjcopyOptions.cpp | 36 ++++++ llvm/tools/llvm-objcopy/ObjcopyOpts.td | 6 + 9 files changed, 278 insertions(+), 8 deletions(-) create mode 100644 llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s create mode 100644 llvm/test/tools/llvm-objcopy/ELF/compress-sections.s diff --git a/llvm/docs/CommandGuide/llvm-objcopy.rst b/llvm/docs/CommandGuide/llvm-objcopy.rst index 985d16e..57d6280 100644 --- a/llvm/docs/CommandGuide/llvm-objcopy.rst +++ b/llvm/docs/CommandGuide/llvm-objcopy.rst @@ -309,6 +309,14 @@ them. Compress DWARF debug sections in the output, using the specified format. Supported formats are ``zlib`` and ``zstd``. Use ``zlib`` if ```` is omitted. +.. option:: --compress-sections
= + + Compress or decompress sections matched by ``
`` using the specified + format. Supported formats are ``zlib`` and ``zstd``. Specify ``none`` for + decompression. When a section is matched by multiple options, the last one + wins. A wildcard ``
`` starting with '!' is disallowed. + Sections within a segment cannot be (de)compressed. + .. option:: --decompress-debug-sections Decompress any compressed DWARF debug sections in the output. diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst index 7588048..ff7fed9 100644 --- a/llvm/docs/ReleaseNotes.rst +++ b/llvm/docs/ReleaseNotes.rst @@ -182,6 +182,10 @@ Changes to the LLVM tools for ELF input to skip the specified symbols when executing other options that can change a symbol's name, binding or visibility. +* llvm-objcopy now supports ``--compress-sections`` to compress or decompress + arbitrary sections not within a segment. + (`#85036 `_.) + * llvm-profgen now supports COFF+DWARF binaries. This enables Sample-based PGO on Windows using Intel VTune's SEP. For details on usage, see the `end-user documentation for SPGO diff --git a/llvm/include/llvm/ObjCopy/CommonConfig.h b/llvm/include/llvm/ObjCopy/CommonConfig.h index 9d6d5fb..ae08d40 100644 --- a/llvm/include/llvm/ObjCopy/CommonConfig.h +++ b/llvm/include/llvm/ObjCopy/CommonConfig.h @@ -262,6 +262,9 @@ struct CommonConfig { bool DecompressDebugSections = false; DebugCompressionType CompressionType = DebugCompressionType::None; + + SmallVector, 0> + compressSections; }; } // namespace objcopy diff --git a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp index 205bc1e..f343d14 100644 --- a/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp +++ b/llvm/lib/ObjCopy/ELF/ELFObjcopy.cpp @@ -215,23 +215,41 @@ static Error dumpSectionToFile(StringRef SecName, StringRef Filename, } Error Object::compressOrDecompressSections(const CommonConfig &Config) { - // Build a list of the debug sections we are going to replace. - // We can't call `AddSection` while iterating over sections, + // Build a list of sections we are going to replace. + // We can't call `addSection` while iterating over sections, // because it would mutate the sections array. SmallVector>, 0> ToReplace; for (SectionBase &Sec : sections()) { - if ((Sec.Flags & SHF_ALLOC) || !StringRef(Sec.Name).starts_with(".debug")) + std::optional CType; + for (auto &[Matcher, T] : Config.compressSections) + if (Matcher.matches(Sec.Name)) + CType = T; + // Handle --compress-debug-sections and --decompress-debug-sections, which + // apply to non-ALLOC debug sections. + if (!(Sec.Flags & SHF_ALLOC) && StringRef(Sec.Name).starts_with(".debug")) { + if (Config.CompressionType != DebugCompressionType::None) + CType = Config.CompressionType; + else if (Config.DecompressDebugSections) + CType = DebugCompressionType::None; + } + if (!CType) continue; + + if (Sec.ParentSegment) + return createStringError( + errc::invalid_argument, + "section '" + Sec.Name + + "' within a segment cannot be (de)compressed"); + if (auto *CS = dyn_cast(&Sec)) { - if (Config.DecompressDebugSections) { + if (*CType == DebugCompressionType::None) ToReplace.emplace_back( &Sec, [=] { return &addSection(*CS); }); - } - } else if (Config.CompressionType != DebugCompressionType::None) { - ToReplace.emplace_back(&Sec, [&, S = &Sec] { + } else if (*CType != DebugCompressionType::None) { + ToReplace.emplace_back(&Sec, [=, S = &Sec] { return &addSection( - CompressedSection(*S, Config.CompressionType, Is64Bits)); + CompressedSection(*S, *CType, Is64Bits)); }); } } diff --git a/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s b/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s new file mode 100644 index 0000000..064ffca --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/compress-sections-within-segment.s @@ -0,0 +1,38 @@ +## Disallow (de)compression for sections within a segment as they are +## effectively immutable. +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: yaml2obj %s -o a +# RUN: not llvm-objcopy a /dev/null --compress-sections .text=zlib 2>&1 | FileCheck %s --implicit-check-not=error: + +# CHECK: error: 'a': section '.text' within a segment cannot be (de)compressed + +# RUN: not llvm-objcopy a /dev/null --compress-sections foo=none 2>&1 | FileCheck %s --check-prefix=CHECK2 --implicit-check-not=error: + +# CHECK2: error: 'a': section 'foo' within a segment cannot be (de)compressed + +## There is an error even if 'foo' is already compressed with zlib. +# RUN: not llvm-objcopy a /dev/null --compress-sections foo=zlib 2>&1 | FileCheck %s --check-prefix=CHECK3 --implicit-check-not=error: + +# CHECK3: error: 'a': section 'foo' within a segment cannot be (de)compressed + +--- !ELF +FileHeader: + Class: ELFCLASS64 + Data: ELFDATA2LSB + Type: ET_EXEC + Machine: EM_X86_64 +ProgramHeaders: + - Type: PT_LOAD + FirstSec: .text + LastSec: foo + Align: 0x1000 + Offset: 0x1000 +Sections: + - Name: .text + Type: SHT_PROGBITS + Offset: 0x1000 + Content: C3 + - Name: foo + Type: SHT_PROGBITS + Flags: [ SHF_COMPRESSED ] + Content: 010000000000000040000000000000000100000000000000789cd36280002d3269002f800151 diff --git a/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s b/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s new file mode 100644 index 0000000..e6fa860 --- /dev/null +++ b/llvm/test/tools/llvm-objcopy/ELF/compress-sections.s @@ -0,0 +1,128 @@ +# REQUIRES: x86-registered-target, zlib, zstd + +# RUN: rm -rf %t && mkdir %t && cd %t +# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o a.o +## '*0=none' wins because it is the last. '*0' sections are decompressed (if originally compressed) or kept unchanged (if uncompressed). +## No section is named 'nomatch'. The third option is a no-op. +# RUN: llvm-objcopy a.o out --compress-sections='*0=zlib' --compress-sections '*0=none' --compress-sections 'nomatch=none' 2>&1 | count 0 +# RUN: llvm-readelf -S out | FileCheck %s --check-prefix=CHECK1 + +# CHECK1: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK1: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK1: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK1-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 3 8 +# CHECK1-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK1-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK1: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK1-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 7 8 +# CHECK1-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK1-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +## Mixing zlib and zstd. +# RUN: llvm-objcopy a.o out2 --compress-sections '*c0=zlib' --compress-sections .debug_str=zstd +# RUN: llvm-readelf -Sr -x nonalloc0 -x .debug_str out2 2>&1 | FileCheck %s --check-prefix=CHECK2 +# RUN: llvm-readelf -z -x nonalloc0 -x .debug_str out2 | FileCheck %s --check-prefix=CHECK2DE + +# CHECK2: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK2: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK2: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK2-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 3 8 +# CHECK2-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK2-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK2: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 8 +# CHECK2-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 7 8 +# CHECK2-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK2-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MSC 0 0 8 + +## llvm-readelf -r doesn't support SHF_COMPRESSED SHT_RELA. +# CHECK2: warning: {{.*}}: unable to read relocations from SHT_RELA section with index 8: section [index 8] has an invalid sh_size ([[#]]) which is not a multiple of its sh_entsize (24) + +# CHECK2: Hex dump of section 'nonalloc0': +## zlib with ch_size=0x10 +# CHECK2-NEXT: 01000000 00000000 10000000 00000000 +# CHECK2-NEXT: 08000000 00000000 {{.*}} +# CHECK2: Hex dump of section '.debug_str': +## zstd with ch_size=0x38 +# CHECK2-NEXT: 02000000 00000000 38000000 00000000 +# CHECK2-NEXT: 01000000 00000000 {{.*}} + +# CHECK2DE: Hex dump of section 'nonalloc0': +# CHECK2DE-NEXT: 0x00000000 00000000 00000000 00000000 00000000 ................ +# CHECK2DE-EMPTY: +# CHECK2DE-NEXT: Hex dump of section '.debug_str': +# CHECK2DE-NEXT: 0x00000000 41414141 41414141 41414141 41414141 AAAAAAAAAAAAAAAA + +## --decompress-debug-sections takes precedence, even if it is before --compress-sections. +# RUN: llvm-objcopy a.o out3 --decompress-debug-sections --compress-sections .debug_str=zstd +# RUN: llvm-readelf -S out3 | FileCheck %s --check-prefix=CHECK3 + +# CHECK3: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +# RUN: llvm-objcopy a.o out4 --compress-sections '*0=zlib' +# RUN: llvm-readelf -S out4 | FileCheck %s --check-prefix=CHECK4 + +# CHECK4: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK4: .text PROGBITS [[#%x,TEXT:]] [[#%x,]] [[#%x,]] 00 AX 0 0 4 +# CHECK4: foo0 PROGBITS [[#%x,FOO0:]] [[#%x,]] [[#%x,]] 00 AC 0 0 8 +# CHECK4-NEXT: .relafoo0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 3 8 +# CHECK4-NEXT: foo1 PROGBITS [[#%x,FOO1:]] [[#%x,]] [[#%x,]] 00 A 0 0 8 +# CHECK4-NEXT: .relafoo1 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 I 11 5 8 +# CHECK4: nonalloc0 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 8 +# CHECK4-NEXT: .relanonalloc0 RELA [[#%x,]] [[#%x,]] [[#%x,]] 18 IC 11 7 8 +# CHECK4-NEXT: nonalloc1 PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 8 +# CHECK4-NEXT: .debug_str PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 01 MS 0 0 1 + +## If a section is already compressed, compression request for another format is ignored. +# RUN: llvm-objcopy a.o out5 --compress-sections 'nonalloc0=zlib' +# RUN: llvm-readelf -x nonalloc0 out5 | FileCheck %s --check-prefix=CHECK5 +# RUN: llvm-objcopy out5 out5a --compress-sections 'nonalloc0=zstd' +# RUN: cmp out5 out5a + +# CHECK5: Hex dump of section 'nonalloc0': +## zlib with ch_size=0x10 +# CHECK5-NEXT: 01000000 00000000 10000000 00000000 +# CHECK5-NEXT: 08000000 00000000 {{.*}} + +# RUN: not llvm-objcopy --compress-sections=foo a.o out 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR1 --implicit-check-not=error: +# ERR1: error: --compress-sections: parse error, not 'section-glob=[none|zlib|zstd]' + +# RUN: llvm-objcopy --compress-sections 'a[=zlib' a.o out 2>&1 | \ +# RUN: FileCheck %s --check-prefix=ERR2 --implicit-check-not=error: +# ERR2: warning: invalid glob pattern, unmatched '[' + +# RUN: not llvm-objcopy a.o out --compress-sections='.debug*=zlib-gabi' --compress-sections='.debug*=' 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR3 %s +# ERR3: error: invalid or unsupported --compress-sections format: .debug*=zlib-gabi + +# RUN: not llvm-objcopy a.o out --compress-sections='!.debug*=zlib' 2>&1 | \ +# RUN: FileCheck -check-prefix=ERR4 %s +# ERR4: error: --compress-sections: negative pattern is unsupported + +.globl _start +_start: + ret + +.section foo0,"a" +.balign 8 +.quad .text-. +.quad .text-. +.section foo1,"a" +.balign 8 +.quad .text-. +.quad .text-. +.section nonalloc0,"" +.balign 8 +.quad .text+1 +.quad .text+2 +sym0: +.section nonalloc1,"" +.balign 8 +.quad 42 +sym1: + +.section .debug_str,"MS",@progbits,1 +.Linfo_string0: + .asciz "AAAAAAAAAAAAAAAAAAAAAAAAAAA" +.Linfo_string1: + .asciz "BBBBBBBBBBBBBBBBBBBBBBBBBBB" diff --git a/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test b/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test index 4258ddb..d9f4f38 100644 --- a/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test +++ b/llvm/test/tools/llvm-objcopy/ELF/decompress-sections.test @@ -4,6 +4,8 @@ # RUN: yaml2obj %s -o %t # RUN: llvm-objcopy --decompress-debug-sections %t %t.de # RUN: llvm-readelf -S %t.de | FileCheck %s +# RUN: llvm-objcopy --compress-sections '*nonalloc=none' --compress-sections .debugx=none %t %t.1.de +# RUN: cmp %t.de %t.1.de # CHECK: Name Type Address Off Size ES Flg Lk Inf Al # CHECK: .debug_alloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 AC 0 0 0 @@ -11,6 +13,33 @@ # CHECK-NEXT: .debugx PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 # CHECK-NEXT: nodebug PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 0 +# RUN: llvm-objcopy --compress-sections '.debug*=none' %t %t2.de +# RUN: llvm-readelf -S -x .debug_alloc -x .debug_nonalloc -x .debugx %t2.de | FileCheck %s --check-prefix=CHECK2 + +# CHECK2: Name Type Address Off Size ES Flg Lk Inf Al +# CHECK2: .debug_alloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 A 0 0 1 +# CHECK2-NEXT: .debug_nonalloc PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 +# CHECK2-NEXT: .debugx PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 0 0 1 +# CHECK2-NEXT: nodebug PROGBITS 0000000000000000 [[#%x,]] [[#%x,]] 00 C 0 0 0 + +# CHECK2: Hex dump of section '.debug_alloc': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-EMPTY: +# CHECK2: Hex dump of section '.debug_nonalloc': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-EMPTY: +# CHECK2-NEXT: Hex dump of section '.debugx': +# CHECK2-NEXT: 0x00000000 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000010 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000020 2a000000 00000000 2a000000 00000000 *.......*....... +# CHECK2-NEXT: 0x00000030 2a000000 00000000 2a000000 00000000 *.......*....... + --- !ELF FileHeader: Class: ELFCLASS64 diff --git a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp index 7269c51..70e8546 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp +++ b/llvm/tools/llvm-objcopy/ObjcopyOptions.cpp @@ -736,6 +736,42 @@ objcopy::parseObjcopyOptions(ArrayRef RawArgsArr, return createStringError(errc::invalid_argument, Reason); } + for (const auto *A : InputArgs.filtered(OBJCOPY_compress_sections)) { + SmallVector Fields; + StringRef(A->getValue()).split(Fields, '='); + if (Fields.size() != 2 || Fields[1].empty()) { + return createStringError( + errc::invalid_argument, + A->getSpelling() + + ": parse error, not 'section-glob=[none|zlib|zstd]'"); + } + + auto Type = StringSwitch(Fields[1]) + .Case("zlib", DebugCompressionType::Zlib) + .Case("zstd", DebugCompressionType::Zstd) + .Default(DebugCompressionType::None); + if (Type == DebugCompressionType::None && Fields[1] != "none") { + return createStringError( + errc::invalid_argument, + "invalid or unsupported --compress-sections format: %s", + A->getValue()); + } + + auto &P = Config.compressSections.emplace_back(); + P.second = Type; + auto Matcher = + NameOrPattern::create(Fields[0], SectionMatchStyle, ErrorCallback); + // =none allows overriding a previous =zlib or =zstd. Reject negative + // patterns, which would be confusing. + if (Matcher && !Matcher->isPositiveMatch()) { + return createStringError( + errc::invalid_argument, + "--compress-sections: negative pattern is unsupported"); + } + if (Error E = P.first.addMatcher(std::move(Matcher))) + return std::move(E); + } + Config.AddGnuDebugLink = InputArgs.getLastArgValue(OBJCOPY_add_gnu_debuglink); // The gnu_debuglink's target is expected to not change or else its CRC would // become invalidated and get rejected. We can avoid recalculating the diff --git a/llvm/tools/llvm-objcopy/ObjcopyOpts.td b/llvm/tools/llvm-objcopy/ObjcopyOpts.td index be02616..4bc80eb 100644 --- a/llvm/tools/llvm-objcopy/ObjcopyOpts.td +++ b/llvm/tools/llvm-objcopy/ObjcopyOpts.td @@ -35,6 +35,12 @@ def : Flag<["--"], "compress-debug-sections">, Alias, AliasArgs<["zlib"]>; def decompress_debug_sections : Flag<["--"], "decompress-debug-sections">, HelpText<"Decompress DWARF debug sections">; +defm compress_sections + : Eq<"compress-sections", + "Compress or decompress sections using specified format. Supported " + "formats: zlib, zstd. Specify 'none' for decompression">, + MetaVarName<"=">; + defm split_dwo : Eq<"split-dwo", "Equivalent to --extract-dwo and as the output file and no other options, " "and then --strip-dwo on the input file">, -- cgit v1.1 From dcc45faa30041a3378bcde4857df205382f1996a Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 4 Apr 2024 09:38:01 -0700 Subject: [ELF] PROVIDE: fix spurious "symbol not found" When archive member extraction involving ENTRY happens after `addScriptReferencedSymbolsToSymTable`, `addScriptReferencedSymbolsToSymTable` may fail to define some PROVIDE symbols used by ENTRY. This is an edge case that regressed after #84512. (The interaction with PROVIDE and ENTRY-in-archive was not considered before). While here, also ensure that --undefined-glob extracted object files are parsed before `addScriptReferencedSymbolsToSymTable`. Fixes: ebb326a51fec37b5a47e5702e8ea157cd4f835cd Pull Request: https://github.com/llvm/llvm-project/pull/87530 --- lld/ELF/Driver.cpp | 17 ++++++++++------- lld/test/ELF/linkerscript/symbolreferenced.s | 25 +++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 7 deletions(-) diff --git a/lld/ELF/Driver.cpp b/lld/ELF/Driver.cpp index 8dbff7f..86cc096 100644 --- a/lld/ELF/Driver.cpp +++ b/lld/ELF/Driver.cpp @@ -2767,13 +2767,6 @@ template void LinkerDriver::link(opt::InputArgList &args) { // Create dynamic sections for dynamic linking and static PIE. config->hasDynSymTab = !ctx.sharedFiles.empty() || config->isPic; - script->addScriptReferencedSymbolsToSymTable(); - - // Prevent LTO from removing any definition referenced by -u. - for (StringRef name : config->undefined) - if (Defined *sym = dyn_cast_or_null(symtab.find(name))) - sym->isUsedInRegularObj = true; - // If an entry symbol is in a static archive, pull out that file now. if (Symbol *sym = symtab.find(config->entry)) handleUndefined(sym, "--entry"); @@ -2782,6 +2775,16 @@ template void LinkerDriver::link(opt::InputArgList &args) { for (StringRef pat : args::getStrings(args, OPT_undefined_glob)) handleUndefinedGlob(pat); + // After potential archive member extraction involving ENTRY and + // -u/--undefined-glob, check whether PROVIDE symbols should be defined (the + // RHS may refer to definitions in just extracted object files). + script->addScriptReferencedSymbolsToSymTable(); + + // Prevent LTO from removing any definition referenced by -u. + for (StringRef name : config->undefined) + if (Defined *sym = dyn_cast_or_null(symtab.find(name))) + sym->isUsedInRegularObj = true; + // Mark -init and -fini symbols so that the LTO doesn't eliminate them. if (Symbol *sym = dyn_cast_or_null(symtab.find(config->init))) sym->isUsedInRegularObj = true; diff --git a/lld/test/ELF/linkerscript/symbolreferenced.s b/lld/test/ELF/linkerscript/symbolreferenced.s index 6f583d2..6848082 100644 --- a/lld/test/ELF/linkerscript/symbolreferenced.s +++ b/lld/test/ELF/linkerscript/symbolreferenced.s @@ -50,6 +50,21 @@ # RUN: not ld.lld -T chain2.t a.o 2>&1 | FileCheck %s --check-prefix=ERR --implicit-check-not=error: # ERR-COUNT-3: error: chain2.t:1: symbol not found: undef +## _start in a lazy object file references PROVIDE symbols. We extract _start +## earlier to avoid spurious "symbol not found" errors. +# RUN: llvm-mc -filetype=obj -triple=x86_64 undef.s -o undef.o +# RUN: llvm-mc -filetype=obj -triple=x86_64 start.s -o start.o +# RUN: ld.lld -T chain2.t undef.o --start-lib start.o --end-lib -o lazy +# RUN: llvm-nm lazy | FileCheck %s --check-prefix=LAZY +# RUN: ld.lld -e 0 -T chain2.t --undefined-glob '_start*' undef.o --start-lib start.o --end-lib -o lazy +# RUN: llvm-nm lazy | FileCheck %s --check-prefix=LAZY + +# LAZY: T _start +# LAZY-NEXT: t f1 +# LAZY-NEXT: T f2 +# LAZY-NEXT: T newsym +# LAZY-NEXT: T unde + #--- a.s .global _start _start: @@ -89,3 +104,13 @@ PROVIDE(newsym = f1); PROVIDE(f2 = undef); PROVIDE_HIDDEN(f1 = f2); PROVIDE(newsym = f1); + +#--- undef.s +.globl undef +undef: ret + +#--- start.s +.globl _start +_start: ret +.data +.quad newsym -- cgit v1.1 From be8fd86f6a57da79a4dbc8d1f4dca2e7adb1192a Mon Sep 17 00:00:00 2001 From: Gulfem Savrun Yeniceri Date: Thu, 4 Apr 2024 16:36:43 +0000 Subject: Revert "[GlobalISel] Fix the infinite loop issue in `commute_int_constant_to_rhs`" This reverts commit 1f01c580444ea2daef67f95ffc5fde2de5a37cec because combine-commute-int-const-lhs.mir test failed in multiple builders. https://lab.llvm.org/buildbot/#/builders/124/builds/10375 https://luci-milo.appspot.com/ui/p/fuchsia/builders/prod/clang-linux-x64/b8751607530180046481/overview --- llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp | 17 +++++++------ .../GlobalISel/combine-commute-int-const-lhs.mir | 28 ---------------------- 2 files changed, 8 insertions(+), 37 deletions(-) delete mode 100644 llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp index 719209e..e53e35d 100644 --- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp +++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp @@ -6273,15 +6273,14 @@ bool CombinerHelper::matchShiftsTooBig(MachineInstr &MI) { bool CombinerHelper::matchCommuteConstantToRHS(MachineInstr &MI) { Register LHS = MI.getOperand(1).getReg(); Register RHS = MI.getOperand(2).getReg(); - if (!getIConstantVRegVal(LHS, MRI)) { - // Skip commuting if LHS is not a constant. But, LHS may be a - // G_CONSTANT_FOLD_BARRIER. If so we commute as long as we don't already - // have a constant on the RHS. - if (MRI.getVRegDef(LHS)->getOpcode() != - TargetOpcode::G_CONSTANT_FOLD_BARRIER) - return false; - } - // Commute as long as RHS is not a constant or G_CONSTANT_FOLD_BARRIER. + auto *LHSDef = MRI.getVRegDef(LHS); + if (getIConstantVRegVal(LHS, MRI).has_value()) + return true; + + // LHS may be a G_CONSTANT_FOLD_BARRIER. If so we commute + // as long as we don't already have a constant on the RHS. + if (LHSDef->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER) + return false; return MRI.getVRegDef(RHS)->getOpcode() != TargetOpcode::G_CONSTANT_FOLD_BARRIER && !getIConstantVRegVal(RHS, MRI); diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir deleted file mode 100644 index b145a6d..0000000 --- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-commute-int-const-lhs.mir +++ /dev/null @@ -1,28 +0,0 @@ -# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4 -# RUN: llc -mtriple aarch64 -run-pass=aarch64-prelegalizer-combiner %s -o - \ -# RUN: --aarch64prelegalizercombiner-disable-rule=constant_fold_binop | FileCheck %s - -# `constant_fold_binop` is disabled to trigger the infinite loop in `commute_int_constant_to_rhs`. - ---- -name: add -tracksRegLiveness: true -body: | - bb.0: - liveins: $s0 - - ; CHECK-LABEL: name: add - ; CHECK: liveins: $s0 - ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: %c0:_(s32) = G_CONSTANT i32 1 - ; CHECK-NEXT: %c1:_(s32) = G_CONSTANT i32 2 - ; CHECK-NEXT: %add:_(s32) = G_ADD %c0, %c1 - ; CHECK-NEXT: $s0 = COPY %add(s32) - ; CHECK-NEXT: RET_ReallyLR - %c0:_(s32) = G_CONSTANT i32 1 - %c1:_(s32) = G_CONSTANT i32 2 - %add:_(s32) = G_ADD %c0, %c1 - $s0 = COPY %add - RET_ReallyLR - -... -- cgit v1.1 From dcab42a0f9aab1d06aadc821e64af381da744819 Mon Sep 17 00:00:00 2001 From: Julian Nagele Date: Thu, 4 Apr 2024 17:44:50 +0100 Subject: [TBAA] Test for tbaa.struct metadata with bitfields in big endian layout (#87617) This test exposes what I think is invalid tbaa.struct metadata currently generated for bitfields when using big endian layout. The regions given by `!{i64 2, i64 4, [[META3:![0-9]+]], i64 4, i64 4 ...` are overlapping. This issue was originally observed in https://github.com/llvm/llvm-project/pull/86709. --- .../CodeGen/tbaa-struct-bitfield-endianness.cpp | 39 ++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp diff --git a/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp new file mode 100644 index 0000000..80884b4 --- /dev/null +++ b/clang/test/CodeGen/tbaa-struct-bitfield-endianness.cpp @@ -0,0 +1,39 @@ +// RUN: %clang_cc1 -triple aarch64_be-apple-darwin -emit-llvm -o - -O1 %s | \ +// RUN: FileCheck -check-prefixes=CHECK,CHECK-BE %s +// RUN: %clang_cc1 -triple aarch64-apple-darwin -emit-llvm -o - -O1 %s | \ +// RUN: FileCheck -check-prefixes=CHECK,CHECK-LE %s +// +// Check that TBAA metadata for structs containing bitfields is +// consistent between big and little endian layouts. +// +// FIXME: The metadata below is invalid for the big endian layout: the +// start offset of 2 is incorrect. + +struct NamedBitfields { + int f1 : 8; + int f2 : 8; + unsigned f3 : 1; + unsigned f4 : 15; + int f5; + double f6; +}; + +// CHECK-LABEL: _Z4copyP14NamedBitfieldsS0_ +// CHECK-SAME: ptr nocapture noundef writeonly [[A1:%.*]], ptr nocapture noundef readonly [[A2:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: tail call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) [[A1]], ptr noundef nonnull align 8 dereferenceable(16) [[A2]], i64 16, i1 false), !tbaa.struct [[TBAA_STRUCT2:![0-9]+]] +// CHECK-NEXT: ret void +// +void copy(NamedBitfields *a1, NamedBitfields *a2) { + *a1 = *a2; +} + +// CHECK-BE: [[TBAA_STRUCT2]] = !{i64 2, i64 4, [[META3:![0-9]+]], i64 4, i64 4, [[META6:![0-9]+]], i64 8, i64 8, [[META8:![0-9]+]]} +// CHECK-LE: [[TBAA_STRUCT2]] = !{i64 0, i64 4, [[META3:![0-9]+]], i64 4, i64 4, [[META6:![0-9]+]], i64 8, i64 8, [[META8:![0-9]+]]} +// CHECK: [[META3]] = !{[[META4:![0-9]+]], [[META4]], i64 0} +// CHECK: [[META4]] = !{!"omnipotent char", [[META5:![0-9]+]], i64 0} +// CHECK: [[META5]] = !{!"Simple C++ TBAA"} +// CHECK: [[META6]] = !{[[META7:![0-9]+]], [[META7]], i64 0} +// CHECK: [[META7]] = !{!"int", [[META4]], i64 0} +// CHECK: [[META8]] = !{[[META9:![0-9]+]], [[META9]], i64 0} +// CHECK: [[META9]] = !{!"double", [[META4]], i64 0} -- cgit v1.1 From fb2a380b5d4e483602f8cf8f36ca1df322a14a77 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 4 Apr 2024 16:46:53 +0000 Subject: [gn build] Manually port 1679b27 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 8a2ab18..6c09b36 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -778,12 +778,12 @@ if (current_toolchain == default_toolchain) { "__tree", "__tuple/find_index.h", "__tuple/make_tuple_types.h", - "__tuple/pair_like.h", "__tuple/sfinae_helpers.h", "__tuple/tuple_element.h", "__tuple/tuple_indices.h", "__tuple/tuple_like.h", "__tuple/tuple_like_ext.h", + "__tuple/tuple_like_no_subrange.h", "__tuple/tuple_size.h", "__tuple/tuple_types.h", "__type_traits/add_const.h", -- cgit v1.1 From 338ecfbac351e4b211836ad73d75bbdf729e9134 Mon Sep 17 00:00:00 2001 From: Fraser Cormack Date: Thu, 4 Apr 2024 17:54:54 +0100 Subject: [libclc] Use VERSION_GREATER_EQUAL where appropriate. NFC This was added in CMake 3.7, which might explain why it wasn't used before. Also reformat a couple of comments. --- libclc/CMakeLists.txt | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index 2d000cf..21e5cac 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -20,7 +20,7 @@ set_property(DIRECTORY APPEND PROPERTY CMAKE_CONFIGURE_DEPENDS spirv64/lib/SOURCES ) -set( LIBCLC_MIN_LLVM "3.9.0" ) +set( LIBCLC_MIN_LLVM 3.9.0 ) set( LIBCLC_TARGETS_TO_BUILD "all" CACHE STRING "Semicolon-separated list of targets to build, or 'all'." ) @@ -32,7 +32,7 @@ include(AddLLVM) message( STATUS "libclc LLVM version: ${LLVM_PACKAGE_VERSION}" ) -if( ${LLVM_PACKAGE_VERSION} VERSION_LESS ${LIBCLC_MIN_LLVM} ) +if( LLVM_PACKAGE_VERSION VERSION_LESS LIBCLC_MIN_LLVM ) message( FATAL_ERROR "libclc needs at least LLVM ${LIBCLC_MIN_LLVM}" ) endif() @@ -66,7 +66,7 @@ set( LIBCLC_TARGETS_ALL ) # mesa3d environment is only available since LLVM 4.0 -if( ${LLVM_PACKAGE_VERSION} VERSION_GREATER "3.9.0" ) +if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 4.0.0 ) list( APPEND LIBCLC_TARGETS_ALL amdgcn-mesa-mesa3d ) endif() @@ -102,7 +102,7 @@ set( LLVM_VERSION_DEFINE "-DHAVE_LLVM=0x${LLVM_VERSION_MAJOR}0${LLVM_VERSION_MIN # LLVM 13 enables standard includes by default -if( ${LLVM_PACKAGE_VERSION} VERSION_GREATER "12.99.99" ) +if( LLVM_PACKAGE_VERSION VERSION_GREATER_EQUAL 13.0.0 ) set( CMAKE_LLAsm_FLAGS "${CMAKE_LLAsm_FLAGS} -cl-no-stdinc" ) set( CMAKE_CLC_FLAGS "${CMAKE_CLC_FLAGS} -cl-no-stdinc" ) endif() @@ -205,7 +205,7 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) list( APPEND dirs amdgpu ) endif() - #nvptx is special + # nvptx is special if( ${ARCH} STREQUAL nvptx OR ${ARCH} STREQUAL nvptx64 ) set( DARCH ptx ) else() @@ -226,8 +226,8 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) endforeach() endforeach() - # Add the generated convert.cl here to prevent adding - # the one listed in SOURCES + # Add the generated convert.cl here to prevent adding the one listed in + # SOURCES if( NOT ${ARCH} STREQUAL "spirv" AND NOT ${ARCH} STREQUAL "spirv64" ) if( NOT ENABLE_RUNTIME_SUBNORMAL AND NOT ${ARCH} STREQUAL "clspv" AND NOT ${ARCH} STREQUAL "clspv64" ) -- cgit v1.1 From 4167fec40768fb05b411bcefa186fe2b106ca7e4 Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Thu, 4 Apr 2024 19:03:01 +0200 Subject: [libc++][chrono] Completes the tzdb class. (#82157) It adds the missing member functions of the tzdb class and adds the free functions that use these member functions. Implements parts of: - P0355 Extending to Calendars and Time Zones --- libcxx/include/__chrono/tzdb.h | 35 +++++++++ libcxx/include/__chrono/tzdb_list.h | 10 +++ libcxx/include/chrono | 5 ++ libcxx/modules/std/chrono.inc | 4 +- libcxx/src/tzdb.cpp | 59 +++++++++++++++ .../chrono.nodiscard_extensions.compile.pass.cpp | 8 +++ .../chrono.nodiscard_extensions.verify.cpp | 8 +++ .../time.zone.db.tzdb/locate_zone.pass.cpp | 84 ++++++++++++++++++++++ .../time.zone.db.access/current_zone.pass.cpp | 77 ++++++++++++++++++++ .../time.zone.db.access/locate_zone.pass.cpp | 62 ++++++++++++++++ .../time.zone.db.tzdb/current_zone.pass.cpp | 79 ++++++++++++++++++++ .../time.zone.db.tzdb/locate_zone.pass.cpp | 64 +++++++++++++++++ 12 files changed, 493 insertions(+), 2 deletions(-) create mode 100644 libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp create mode 100644 libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp diff --git a/libcxx/include/__chrono/tzdb.h b/libcxx/include/__chrono/tzdb.h index 45c20f2..e0bfedf 100644 --- a/libcxx/include/__chrono/tzdb.h +++ b/libcxx/include/__chrono/tzdb.h @@ -16,6 +16,7 @@ // Enable the contents of the header only when libc++ was built with experimental features enabled. #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) +# include <__algorithm/ranges_lower_bound.h> # include <__chrono/leap_second.h> # include <__chrono/time_zone.h> # include <__chrono/time_zone_link.h> @@ -43,6 +44,40 @@ struct tzdb { vector links; vector leap_seconds; + + [[nodiscard]] _LIBCPP_HIDE_FROM_ABI const time_zone* __locate_zone(string_view __name) const { + if (const time_zone* __result = __find_in_zone(__name)) + return __result; + + if (auto __it = ranges::lower_bound(links, __name, {}, &time_zone_link::name); + __it != links.end() && __it->name() == __name) + if (const time_zone* __result = __find_in_zone(__it->target())) + return __result; + + return nullptr; + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_HIDE_FROM_ABI const time_zone* locate_zone(string_view __name) const { + if (const time_zone* __result = __locate_zone(__name)) + return __result; + + std::__throw_runtime_error("tzdb: requested time zone not found"); + } + + _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI const time_zone* current_zone() const { + return __current_zone(); + } + +private: + _LIBCPP_HIDE_FROM_ABI const time_zone* __find_in_zone(string_view __name) const noexcept { + if (auto __it = ranges::lower_bound(zones, __name, {}, &time_zone::name); + __it != zones.end() && __it->name() == __name) + return std::addressof(*__it); + + return nullptr; + } + + [[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* __current_zone() const; }; } // namespace chrono diff --git a/libcxx/include/__chrono/tzdb_list.h b/libcxx/include/__chrono/tzdb_list.h index e8aaf31..693899d 100644 --- a/libcxx/include/__chrono/tzdb_list.h +++ b/libcxx/include/__chrono/tzdb_list.h @@ -17,6 +17,7 @@ #if !defined(_LIBCPP_HAS_NO_INCOMPLETE_TZDB) # include <__availability> +# include <__chrono/time_zone.h> # include <__chrono/tzdb.h> # include <__config> # include <__fwd/string.h> @@ -84,6 +85,15 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline con return get_tzdb_list().front(); } +_LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline const time_zone* +locate_zone(string_view __name) { + return get_tzdb().locate_zone(__name); +} + +_LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_HIDE_FROM_ABI inline const time_zone* current_zone() { + return get_tzdb().current_zone(); +} + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb(); _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI string remote_version(); diff --git a/libcxx/include/chrono b/libcxx/include/chrono index 4dd4313..8fdc30a 100644 --- a/libcxx/include/chrono +++ b/libcxx/include/chrono @@ -689,6 +689,9 @@ struct tzdb { vector zones; vector links; vector leap_seconds; + + const time_zone* locate_zone(string_view tz_name) const; + const time_zone* current_zone() const; }; class tzdb_list { // C++20 @@ -714,6 +717,8 @@ public: // [time.zone.db.access], time zone database access const tzdb& get_tzdb(); // C++20 tzdb_list& get_tzdb_list(); // C++20 +const time_zone* locate_zone(string_view tz_name); // C++20 +const time_zone* current_zone() // C++20 // [time.zone.db.remote], remote time zone database support const tzdb& reload_tzdb(); // C++20 diff --git a/libcxx/modules/std/chrono.inc b/libcxx/modules/std/chrono.inc index 2c0bd3f..e142280 100644 --- a/libcxx/modules/std/chrono.inc +++ b/libcxx/modules/std/chrono.inc @@ -199,10 +199,10 @@ export namespace std { using std::chrono::tzdb_list; // [time.zone.db.access], time zone database access - // using std::chrono::current_zone; + using std::chrono::current_zone; using std::chrono::get_tzdb; using std::chrono::get_tzdb_list; - // using std::chrono::locate_zone; + using std::chrono::locate_zone; // [time.zone.db.remote], remote time zone database support using std::chrono::reload_tzdb; diff --git a/libcxx/src/tzdb.cpp b/libcxx/src/tzdb.cpp index 7ba5ceb..2c82a4a 100644 --- a/libcxx/src/tzdb.cpp +++ b/libcxx/src/tzdb.cpp @@ -675,6 +675,57 @@ void __init_tzdb(tzdb& __tzdb, __tz::__rules_storage_type& __rules) { std::ranges::sort(__tzdb.leap_seconds); } +#ifdef _WIN32 +[[nodiscard]] static const time_zone* __current_zone_windows(const tzdb& tzdb) { + // TODO TZDB Implement this on Windows. + std::__throw_runtime_error("unknown time zone"); +} +#else // ifdef _WIN32 +[[nodiscard]] static const time_zone* __current_zone_posix(const tzdb& tzdb) { + // On POSIX systems there are several ways to configure the time zone. + // In order of priority they are: + // - TZ environment variable + // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap08.html#tag_08 + // The documentation is unclear whether or not it's allowed to + // change time zone information. For example the TZ string + // MST7MDT + // this is an entry in tzdata.zi. The value + // MST + // is also an entry. Is it allowed to use the following? + // MST-3 + // Even when this is valid there is no time_zone record in the + // database. Since the library would need to return a valid pointer, + // this means the library needs to allocate and leak a pointer. + // + // - The time zone name is the target of the symlink /etc/localtime + // relative to /usr/share/zoneinfo/ + + // The algorithm is like this: + // - If the environment variable TZ is set and points to a valid + // record use this value. + // - Else use the name based on the `/etc/localtime` symlink. + + if (const char* __tz = getenv("TZ")) + if (const time_zone* __result = tzdb.__locate_zone(__tz)) + return __result; + + filesystem::path __path = "/etc/localtime"; + if (!std::filesystem::exists(__path)) + std::__throw_runtime_error("tzdb: the symlink '/etc/localtime' does not exist"); + + if (!std::filesystem::is_symlink(__path)) + std::__throw_runtime_error("tzdb: the path '/etc/localtime' is not a symlink"); + + filesystem::path __tz = filesystem::read_symlink(__path); + string __name = filesystem::relative(__tz, "/usr/share/zoneinfo/"); + + if (const time_zone* __result = tzdb.__locate_zone(__name)) + return __result; + + std::__throw_runtime_error(("tzdb: the time zone '" + __name + "' is not found in the database").c_str()); +} +#endif // ifdef _WIN32 + //===----------------------------------------------------------------------===// // Public API //===----------------------------------------------------------------------===// @@ -684,6 +735,14 @@ _LIBCPP_NODISCARD_EXT _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI tzdb_l return __result; } +[[nodiscard]] _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const time_zone* tzdb::__current_zone() const { +#ifdef _WIN32 + return chrono::__current_zone_windows(*this); +#else + return chrono::__current_zone_posix(*this); +#endif +} + _LIBCPP_AVAILABILITY_TZDB _LIBCPP_EXPORTED_FROM_ABI const tzdb& reload_tzdb() { if (chrono::remote_version() == chrono::get_tzdb().version) return chrono::get_tzdb(); diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp index c868832..9acb57f 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.compile.pass.cpp @@ -38,9 +38,17 @@ void test() { std::chrono::get_tzdb_list(); std::chrono::get_tzdb(); + std::chrono::locate_zone("name"); + std::chrono::current_zone(); std::chrono::remote_version(); { + const std::chrono::tzdb& t = list.front(); + t.locate_zone("name"); + t.current_zone(); + } + + { tz.name(); operator==(tz, tz); operator<=>(tz, tz); diff --git a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp index 4d26b46..8795a4e 100644 --- a/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp +++ b/libcxx/test/libcxx/diagnostics/chrono.nodiscard_extensions.verify.cpp @@ -33,9 +33,17 @@ void test() { list.cbegin(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} list.cend(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + { + const std::chrono::tzdb& t = list.front(); + t.locate_zone("name"); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + t.current_zone(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + } + namespace crno = std::chrono; crno::get_tzdb_list(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} crno::get_tzdb(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + crno::locate_zone("n"); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} + crno::current_zone(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} crno::remote_version(); // expected-warning {{ignoring return value of function declared with 'nodiscard' attribute}} { diff --git a/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp new file mode 100644 index 0000000..971f7f0 --- /dev/null +++ b/libcxx/test/libcxx/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -0,0 +1,84 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* locate_zone(string_view tz_name) const; + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" +#include "filesystem_test_helper.h" +#include "test_tzdb.h" + +scoped_test_env env; +[[maybe_unused]] const std::filesystem::path dir = env.create_dir("zoneinfo"); +const std::filesystem::path file = env.create_file("zoneinfo/tzdata.zi"); + +std::string_view std::chrono::__libcpp_tzdb_directory() { + static std::string result = dir.string(); + return result; +} + +void write(std::string_view input) { + static int version = 0; + + std::ofstream f{file}; + f << "# version " << version++ << '\n'; + f.write(input.data(), input.size()); +} + +static const std::chrono::tzdb& parse(std::string_view input) { + write(input); + return std::chrono::reload_tzdb(); +} + +int main(int, const char**) { + const std::chrono::tzdb& tzdb = parse( + R"( +Z zone 0 r f +L zone link +L link link_to_link +)"); + + { + const std::chrono::time_zone* tz = tzdb.locate_zone("zone"); + assert(tz); + assert(tz->name() == "zone"); + } + { + const std::chrono::time_zone* tz = tzdb.locate_zone("link"); + assert(tz); + assert(tz->name() == "zone"); + } + + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD tzdb.locate_zone("link_to_link")); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp new file mode 100644 index 0000000..d85c8ba --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/current_zone.pass.cpp @@ -0,0 +1,77 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// const time_zone* current_zone(); + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +#ifdef _WIN32 +static void set_tz(std::string zone) { + // Note Windows does not have setenv, only putenv + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/putenv-s-wputenv-s?view=msvc-170 + // Unlike POSIX it does not mention the string of putenv becomes part + // of the environment. + + int status = _putenv_s("TZ", zone.c_str(), 1); + assert(status == 0); +} + +#else +static void set_tz(const std::string& zone) { + int status = setenv("TZ", zone.c_str(), 1); + assert(status == 0); +} +#endif + +static void test_zone(const std::string& zone) { + set_tz(zone); + const std::chrono::time_zone* tz = std::chrono::current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(const std::string& link, std::string_view zone) { + set_tz(link); + const std::chrono::time_zone* tz = std::chrono::current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +int main(int, const char**) { + const std::chrono::time_zone* tz = std::chrono::current_zone(); + // Returns a valid time zone, the value depends on the OS settings. + assert(tz); + // setting the environment to an invalid value returns the value of + // the OS setting. + set_tz("This is not a time zone"); + assert(tz == std::chrono::current_zone()); + + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(std::string{zone.name()}); + + for (const auto& link : db.links) + test_link(std::string{link.name()}, link.target()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp new file mode 100644 index 0000000..c3142a8 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.access/locate_zone.pass.cpp @@ -0,0 +1,62 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// const time_zone* locate_zone(string_view tz_name); + +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +static void test_zone(std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::locate_zone(zone); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(std::string_view link, std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::locate_zone(link); + assert(tz); + assert(tz->name() == zone); +} + +static void test_exception([[maybe_unused]] std::string_view zone) { + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::chrono::locate_zone(zone)); +} + +int main(int, const char**) { + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(zone.name()); + + for (const auto& link : db.links) + test_link(link.name(), link.target()); + + test_exception("This is not a time zone"); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp new file mode 100644 index 0000000..7b4218c --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/current_zone.pass.cpp @@ -0,0 +1,79 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* current_zone() const; + +#include +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +#ifdef _WIN32 +static void set_tz(std::string zone) { + // Note Windows does not have setenv, only putenv + // https://learn.microsoft.com/en-us/cpp/c-runtime-library/reference/putenv-s-wputenv-s?view=msvc-170 + // Unlike POSIX it does not mention the string of putenv becomes part + // of the environment. + + int status = _putenv_s("TZ", zone.c_str(), 1); + assert(status == 0); +} + +#else +static void set_tz(const std::string& zone) { + int status = setenv("TZ", zone.c_str(), 1); + assert(status == 0); +} +#endif + +static void test_zone(const std::string& zone) { + set_tz(zone); + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(const std::string& link, std::string_view zone) { + set_tz(link); + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + assert(tz); + assert(tz->name() == zone); +} + +int main(int, const char**) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().current_zone(); + // Returns a valid time zone, the value depends on the OS settings. + assert(tz); + // setting the environment to an invalid value returns the value of + // the OS setting. + set_tz("This is not a time zone"); + assert(tz == std::chrono::get_tzdb().current_zone()); + + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(std::string{zone.name()}); + + for (const auto& link : db.links) + test_link(std::string{link.name()}, link.target()); + + return 0; +} diff --git a/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp new file mode 100644 index 0000000..12987f6 --- /dev/null +++ b/libcxx/test/std/time/time.zone/time.zone.db/time.zone.db.tzdb/locate_zone.pass.cpp @@ -0,0 +1,64 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// UNSUPPORTED: c++03, c++11, c++14, c++17 +// UNSUPPORTED: no-filesystem, no-localization, no-tzdb + +// XFAIL: libcpp-has-no-incomplete-tzdb +// XFAIL: availability-tzdb-missing + +// + +// struct tzdb + +// const time_zone* locate_zone(string_view tz_name) const; + +#include +#include +#include + +#include "test_macros.h" +#include "assert_macros.h" +#include "concat_macros.h" + +static void test_zone(std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().locate_zone(zone); + assert(tz); + assert(tz->name() == zone); +} + +static void test_link(std::string_view link, std::string_view zone) { + const std::chrono::time_zone* tz = std::chrono::get_tzdb().locate_zone(link); + assert(tz); + assert(tz->name() == zone); +} + +static void test_exception([[maybe_unused]] std::string_view zone) { + TEST_VALIDATE_EXCEPTION( + std::runtime_error, + [&]([[maybe_unused]] const std::runtime_error& e) { + std::string_view what{"tzdb: requested time zone not found"}; + TEST_LIBCPP_REQUIRE( + e.what() == what, + TEST_WRITE_CONCATENATED("\nExpected exception ", what, "\nActual exception ", e.what(), '\n')); + }, + TEST_IGNORE_NODISCARD std::chrono::get_tzdb().locate_zone(zone)); +} + +int main(int, const char**) { + const std::chrono::tzdb& db = std::chrono::get_tzdb(); + for (const auto& zone : db.zones) + test_zone(zone.name()); + + for (const auto& link : db.links) + test_link(link.name(), link.target()); + + test_exception("This is not a time zone"); + + return 0; +} -- cgit v1.1 From 0f7266a97c0443234bf18508a408e6917f6a2d46 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 4 Apr 2024 17:04:58 +0000 Subject: [gn build] Manually port 6f2d8cc0 --- llvm/utils/gn/secondary/libcxx/include/BUILD.gn | 1 + llvm/utils/gn/secondary/libcxx/src/BUILD.gn | 1 + 2 files changed, 2 insertions(+) diff --git a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn index 6c09b36..78a6e6f 100644 --- a/llvm/utils/gn/secondary/libcxx/include/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/include/BUILD.gn @@ -352,6 +352,7 @@ if (current_toolchain == default_toolchain) { "__chrono/formatter.h", "__chrono/hh_mm_ss.h", "__chrono/high_resolution_clock.h", + "__chrono/leap_second.h", "__chrono/literals.h", "__chrono/month.h", "__chrono/month_weekday.h", diff --git a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn index 5530972..90f6f5d 100644 --- a/llvm/utils/gn/secondary/libcxx/src/BUILD.gn +++ b/llvm/utils/gn/secondary/libcxx/src/BUILD.gn @@ -315,6 +315,7 @@ if (libcxx_enable_experimental) { sources = [ "experimental/keep.cpp" ] if (libcxx_enable_filesystem && libcxx_enable_time_zone_database) { sources += [ + "include/tzdb/leap_second_private.h", "include/tzdb/time_zone_link_private.h", "include/tzdb/time_zone_private.h", "include/tzdb/types_private.h", -- cgit v1.1 From 6cf532891f9b49ccae3c7e3dfedb2a3662cdd569 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 4 Apr 2024 17:05:30 +0000 Subject: [gn build] Port 3365d6217901 --- .../utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn index 33fdecf..59dc38c 100644 --- a/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn +++ b/llvm/utils/gn/secondary/clang-tools-extra/clang-tidy/readability/BUILD.gn @@ -25,6 +25,7 @@ static_library("readability") { "DeleteNullPointerCheck.cpp", "DuplicateIncludeCheck.cpp", "ElseAfterReturnCheck.cpp", + "EnumInitialValueCheck.cpp", "FunctionCognitiveComplexityCheck.cpp", "FunctionSizeCheck.cpp", "IdentifierLengthCheck.cpp", -- cgit v1.1 From 13e75721117070d52cf3be7d88becccd5a6838b5 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 4 Apr 2024 17:05:31 +0000 Subject: [gn build] Port 8bb9443333e0 --- llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn index 15766d4..fba8118 100644 --- a/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn +++ b/llvm/utils/gn/secondary/llvm/lib/CodeGen/GlobalISel/BUILD.gn @@ -17,6 +17,7 @@ static_library("GlobalISel") { "CallLowering.cpp", "Combiner.cpp", "CombinerHelper.cpp", + "CombinerHelperVectorOps.cpp", "GIMatchTableExecutor.cpp", "GISelChangeObserver.cpp", "GISelKnownBits.cpp", -- cgit v1.1 From 258dd64978c055110373447325b9b1dd4e5268d7 Mon Sep 17 00:00:00 2001 From: Arthur Eubanks Date: Thu, 4 Apr 2024 17:05:32 +0000 Subject: [gn build] Port fd38366e4525 --- llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn b/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn index 5fead24..dc85fb0 100644 --- a/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn +++ b/llvm/utils/gn/secondary/bolt/lib/Profile/BUILD.gn @@ -12,7 +12,6 @@ static_library("Profile") { "DataAggregator.cpp", "DataReader.cpp", "Heatmap.cpp", - "ProfileReaderBase.cpp", "StaleProfileMatching.cpp", "YAMLProfileReader.cpp", "YAMLProfileWriter.cpp", -- cgit v1.1 From f2d22b5944b3c3f77dd236d89dd419d231243a56 Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 4 Apr 2024 10:09:43 -0700 Subject: [memprof] Make RecordWriterTrait a non-template class (#87604) commit d89914f30bc7c180fe349a5aa0f03438ae6c20a4 Author: Kazu Hirata Date: Wed Apr 3 21:48:38 2024 -0700 changed RecordWriterTrait to a template class with IndexedVersion as a template parameter. This patch changes the class back to a non-template one while retaining the ability to serialize multiple versions. The reason I changed RecordWriterTrait to a template class was because, even if RecordWriterTrait had IndexedVersion as a member variable, RecordWriterTrait::EmitKeyDataLength, being a static function, would not have access to the variable. Since OnDiskChainedHashTableGenerator calls EmitKeyDataLength as: const std::pair &Len = InfoObj.EmitKeyDataLength(Out, I->Key, I->Data); we can make EmitKeyDataLength a member function, but we have one problem. InstrProfWriter::writeImpl calls: void insert(typename Info::key_type_ref Key, typename Info::data_type_ref Data) { Info InfoObj; insert(Key, Data, InfoObj); } which default-constructs RecordWriterTrait without a specific version number. This patch fixes the problem by adjusting InstrProfWriter::writeImpl to call the other form of insert instead: void insert(typename Info::key_type_ref Key, typename Info::data_type_ref Data, Info &InfoObj) To prevent an accidental invocation of the default constructor of RecordWriterTrait, this patch deletes the default constructor. --- llvm/include/llvm/ProfileData/MemProf.h | 10 +++++++--- llvm/lib/ProfileData/InstrProfWriter.cpp | 7 +++---- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/ProfileData/MemProf.h b/llvm/include/llvm/ProfileData/MemProf.h index 110e697..0431c18 100644 --- a/llvm/include/llvm/ProfileData/MemProf.h +++ b/llvm/include/llvm/ProfileData/MemProf.h @@ -502,7 +502,7 @@ private: }; // Trait for writing IndexedMemProfRecord data to the on-disk hash table. -template class RecordWriterTrait { +class RecordWriterTrait { public: using key_type = uint64_t; using key_type_ref = uint64_t; @@ -517,12 +517,16 @@ public: // we must use a default constructor with no params for the writer trait so we // have a public member which must be initialized by the user. MemProfSchema *Schema = nullptr; + // The MemProf version to use for the serialization. + IndexedVersion Version; - RecordWriterTrait() = default; + // We do not support the default constructor, which does not set Version. + RecordWriterTrait() = delete; + RecordWriterTrait(IndexedVersion V) : Version(V) {} static hash_value_type ComputeHash(key_type_ref K) { return K; } - static std::pair + std::pair EmitKeyDataLength(raw_ostream &Out, key_type_ref K, data_type_ref V) { using namespace support; diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index a1bc180..96ab729 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -558,14 +558,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { } auto RecordWriter = - std::make_unique>(); + std::make_unique(memprof::Version1); RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator< - memprof::RecordWriterTrait> + OnDiskChainedHashTableGenerator RecordTableGenerator; for (auto &I : MemProfRecordData) { // Insert the key (func hash) and value (memprof record). - RecordTableGenerator.insert(I.first, I.second); + RecordTableGenerator.insert(I.first, I.second, *RecordWriter.get()); } // Release the memory of this MapVector as it is no longer needed. MemProfRecordData.clear(); -- cgit v1.1 From b9ec4ab6ac4f3ae8603da8bfbf10bc0ec104e0b7 Mon Sep 17 00:00:00 2001 From: Fangrui Song Date: Thu, 4 Apr 2024 10:28:36 -0700 Subject: [CMake] Install LLVMgold.so for LLVM_INSTALL_TOOLCHAIN_ONLY=on (#87567) LLVMgold.so can be used with GNU ar, gold, ld, and nm to process LLVM bitcode files. Install it in LLVM_INSTALL_TOOLCHAIN_ONLY=on builds like we install libLTO.so. Suggested by @emelife Fix #84271 --- llvm/tools/gold/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/tools/gold/CMakeLists.txt b/llvm/tools/gold/CMakeLists.txt index 58b3238..5c78529 100644 --- a/llvm/tools/gold/CMakeLists.txt +++ b/llvm/tools/gold/CMakeLists.txt @@ -12,7 +12,7 @@ if( LLVM_ENABLE_PIC AND LLVM_BINUTILS_INCDIR ) TargetParser ) - add_llvm_library(LLVMgold MODULE + add_llvm_library(LLVMgold MODULE INSTALL_WITH_TOOLCHAIN gold-plugin.cpp ) -- cgit v1.1 From 220cdf940e953002df1521bbd061d8e0b4ffed5c Mon Sep 17 00:00:00 2001 From: Fabian Mora Date: Thu, 4 Apr 2024 13:34:46 -0400 Subject: [mlir] Add `requiresReplacedValues` and `visitReplacedValues` to `PromotableOpInterface` (#86792) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add `requiresReplacedValues` and `visitReplacedValues` methods to `PromotableOpInterface`. These methods allow `PromotableOpInterface` ops to transforms definitions mutated by a `store`. This change is necessary to correctly handle the promotion of `LLVM_DbgDeclareOp`. --------- Co-authored-by: Théo Degioanni <30992420+Moxinilian@users.noreply.github.com> --- .../mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td | 6 +++-- .../mlir/Interfaces/MemorySlotInterfaces.td | 30 ++++++++++++++++++++++ mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp | 19 +++++++++----- mlir/lib/Transforms/Mem2Reg.cpp | 16 +++++++++++- mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir | 21 +++++++++++++++ 5 files changed, 82 insertions(+), 10 deletions(-) diff --git a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td index 28526f1..a52cca3 100644 --- a/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td +++ b/mlir/include/mlir/Dialect/LLVMIR/LLVMIntrinsicOps.td @@ -562,8 +562,10 @@ class LLVM_DbgIntrOp traits = []> }]; } -def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr", - [DeclareOpInterfaceMethods]> { +def LLVM_DbgDeclareOp : LLVM_DbgIntrOp<"dbg.declare", "addr", [ + DeclareOpInterfaceMethods]> { let summary = "Describes how the address relates to a source language variable."; let arguments = (ins LLVM_AnyPointer:$addr, diff --git a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td index e10e2d4..9db8936 100644 --- a/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td +++ b/mlir/include/mlir/Interfaces/MemorySlotInterfaces.td @@ -229,6 +229,36 @@ def PromotableOpInterface : OpInterface<"PromotableOpInterface"> { (ins "const ::llvm::SmallPtrSetImpl &":$blockingUses, "::mlir::RewriterBase &":$rewriter) >, + InterfaceMethod<[{ + This method allows the promoted operation to visit the SSA values used + in place of the memory slot once the promotion process of the memory + slot is complete. + + If this method returns true, the `visitReplacedValues` method on this + operation will be called after the main mutation stage finishes + (i.e., after all ops have been processed with `removeBlockingUses`). + + Operations should only the replaced values if the intended + transformation applies to all the replaced values. Furthermore, replaced + values must not be deleted. + }], "bool", "requiresReplacedValues", (ins), [{}], + [{ return false; }] + >, + InterfaceMethod<[{ + Transforms the IR using the SSA values that replaced the memory slot. + + This method will only be called after all blocking uses have been + scheduled for removal and if `requiresReplacedValues` returned + true. + + The rewriter is located after the promotable operation on call. All IR + mutations must happen through the rewriter. During the transformation, + *no operation should be deleted*. + }], + "void", "visitReplacedValues", + (ins "::llvm::ArrayRef>":$mutatedDefs, + "::mlir::RewriterBase &":$rewriter), [{}], [{ return; }] + >, ]; } diff --git a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp index 06c1fdd..5637552 100644 --- a/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp +++ b/mlir/lib/Dialect/LLVMIR/IR/LLVMMemorySlot.cpp @@ -168,13 +168,6 @@ bool LLVM::StoreOp::canUsesBeRemoved( DeletionKind LLVM::StoreOp::removeBlockingUses( const MemorySlot &slot, const SmallPtrSetImpl &blockingUses, RewriterBase &rewriter, Value reachingDefinition) { - // `canUsesBeRemoved` checked this blocking use must be the stored slot - // pointer. - for (Operation *user : slot.ptr.getUsers()) - if (auto declareOp = dyn_cast(user)) - rewriter.create(declareOp->getLoc(), getValue(), - declareOp.getVarInfo(), - declareOp.getLocationExpr()); return DeletionKind::Delete; } @@ -407,6 +400,18 @@ DeletionKind LLVM::DbgValueOp::removeBlockingUses( return DeletionKind::Keep; } +bool LLVM::DbgDeclareOp::requiresReplacedValues() { return true; } + +void LLVM::DbgDeclareOp::visitReplacedValues( + ArrayRef> definitions, + RewriterBase &rewriter) { + for (auto [op, value] : definitions) { + rewriter.setInsertionPointAfter(op); + rewriter.create(getLoc(), value, getVarInfo(), + getLocationExpr()); + } +} + //===----------------------------------------------------------------------===// // Interfaces for GEPOp //===----------------------------------------------------------------------===// diff --git a/mlir/lib/Transforms/Mem2Reg.cpp b/mlir/lib/Transforms/Mem2Reg.cpp index 80e3b79..abe565e 100644 --- a/mlir/lib/Transforms/Mem2Reg.cpp +++ b/mlir/lib/Transforms/Mem2Reg.cpp @@ -202,6 +202,7 @@ private: /// Contains the reaching definition at this operation. Reaching definitions /// are only computed for promotable memory operations with blocking uses. DenseMap reachingDefs; + DenseMap replacedValuesMap; DominanceInfo &dominance; MemorySlotPromotionInfo info; const Mem2RegStatistics &statistics; @@ -438,6 +439,7 @@ Value MemorySlotPromoter::computeReachingDefInBlock(Block *block, assert(stored && "a memory operation storing to a slot must provide a " "new definition of the slot"); reachingDef = stored; + replacedValuesMap[memOp] = stored; } } } @@ -552,6 +554,10 @@ void MemorySlotPromoter::removeBlockingUses() { dominanceSort(usersToRemoveUses, *slot.ptr.getParentBlock()->getParent()); llvm::SmallVector toErase; + // List of all replaced values in the slot. + llvm::SmallVector> replacedValuesList; + // Ops to visit with the `visitReplacedValues` method. + llvm::SmallVector toVisit; for (Operation *toPromote : llvm::reverse(usersToRemoveUses)) { if (auto toPromoteMemOp = dyn_cast(toPromote)) { Value reachingDef = reachingDefs.lookup(toPromoteMemOp); @@ -565,7 +571,9 @@ void MemorySlotPromoter::removeBlockingUses() { slot, info.userToBlockingUses[toPromote], rewriter, reachingDef) == DeletionKind::Delete) toErase.push_back(toPromote); - + if (toPromoteMemOp.storesTo(slot)) + if (Value replacedValue = replacedValuesMap[toPromoteMemOp]) + replacedValuesList.push_back({toPromoteMemOp, replacedValue}); continue; } @@ -574,6 +582,12 @@ void MemorySlotPromoter::removeBlockingUses() { if (toPromoteBasic.removeBlockingUses(info.userToBlockingUses[toPromote], rewriter) == DeletionKind::Delete) toErase.push_back(toPromote); + if (toPromoteBasic.requiresReplacedValues()) + toVisit.push_back(toPromoteBasic); + } + for (PromotableOpInterface op : toVisit) { + rewriter.setInsertionPointAfter(op); + op.visitReplacedValues(replacedValuesList, rewriter); } for (Operation *toEraseOp : toErase) diff --git a/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir b/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir index f7ddb4a..b7cbd78 100644 --- a/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir +++ b/mlir/test/Dialect/LLVMIR/mem2reg-dbginfo.mlir @@ -29,6 +29,27 @@ llvm.func @basic_store_load(%arg0: i64) -> i64 { llvm.return %2 : i64 } +// CHECK-LABEL: llvm.func @multiple_store_load +llvm.func @multiple_store_load(%arg0: i64) -> i64 { + %0 = llvm.mlir.constant(1 : i32) : i32 + // CHECK-NOT: = llvm.alloca + %1 = llvm.alloca %0 x i64 {alignment = 8 : i64} : (i32) -> !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + // CHECK-NOT: llvm.store + llvm.store %arg0, %1 {alignment = 4 : i64} : i64, !llvm.ptr + // CHECK-NOT: llvm.intr.dbg.declare + llvm.intr.dbg.declare #di_local_variable = %1 : !llvm.ptr + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED:.*]] : i64 + // CHECK: llvm.intr.dbg.value #[[$VAR]] = %[[LOADED]] : i64 + // CHECK-NOT: llvm.intr.dbg.value + // CHECK-NOT: llvm.intr.dbg.declare + // CHECK-NOT: llvm.store + %2 = llvm.load %1 {alignment = 4 : i64} : !llvm.ptr -> i64 + // CHECK: llvm.return %[[LOADED]] : i64 + llvm.return %2 : i64 +} + // CHECK-LABEL: llvm.func @block_argument_value // CHECK-SAME: (%[[ARG0:.*]]: i64, {{.*}}) llvm.func @block_argument_value(%arg0: i64, %arg1: i1) -> i64 { -- cgit v1.1 From 02b49d14a50cbfad0196cdddba6771f0593fdc3b Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 28 Mar 2024 14:23:58 -0500 Subject: [ValueTracking] Add tests for computing known bits from `(icmp eq (and/or x,y), C)`; NFC --- llvm/test/Transforms/InstCombine/known-bits.ll | 110 +++++++++++++++++++++++-- 1 file changed, 105 insertions(+), 5 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index 5305c78..af3db82 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -124,7 +124,6 @@ exit: ret i8 %or2 } - define i8 @test_cond_and_bothways(i8 %x) { ; CHECK-LABEL: @test_cond_and_bothways( ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 91 @@ -181,8 +180,6 @@ exit: ret i8 %or2 } - - define i8 @test_cond_and_commuted(i8 %x, i1 %c1, i1 %c2) { ; CHECK-LABEL: @test_cond_and_commuted( ; CHECK-NEXT: [[AND:%.*]] = and i8 [[X:%.*]], 3 @@ -343,7 +340,7 @@ exit: ret i8 %or2 } -define i32 @test_icmp_trunc1(i32 %x){ +define i32 @test_icmp_trunc1(i32 %x) { ; CHECK-LABEL: @test_icmp_trunc1( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = trunc i32 [[X:%.*]] to i16 @@ -365,7 +362,7 @@ else: ret i32 0 } -define i32 @test_icmp_trunc_assume(i32 %x){ +define i32 @test_icmp_trunc_assume(i32 %x) { ; CHECK-LABEL: @test_icmp_trunc_assume( ; CHECK-NEXT: entry: ; CHECK-NEXT: [[Y:%.*]] = trunc i32 [[X:%.*]] to i16 @@ -532,7 +529,110 @@ if.else: ret i1 %other } +define i8 @and_eq_bits_must_be_set(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 1 + ret i8 %r +} + +define i8 @and_eq_bits_must_be_set2(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set2( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 11 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 11 + ret i8 %r +} + +define i8 @and_eq_bits_must_be_set2_partial_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @and_eq_bits_must_be_set2_partial_fail( +; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 111 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = and i8 %x, %y + %cmp = icmp eq i8 %xy, 123 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 111 + ret i8 %r +} + +define i8 @or_eq_bits_must_be_unset(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 3 + ret i8 %r +} + +define i8 @or_eq_bits_must_be_unset2(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset2( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 1 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 1 + ret i8 %r +} +define i8 @or_eq_bits_must_be_unset2_partial_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @or_eq_bits_must_be_unset2_partial_fail( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 7 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp eq i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %y, 7 + ret i8 %r +} + +define i8 @or_ne_bits_must_be_unset2_fail(i8 %x, i8 %y) { +; CHECK-LABEL: @or_ne_bits_must_be_unset2_fail( +; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8 [[XY]], 124 +; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) +; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 3 +; CHECK-NEXT: ret i8 [[R]] +; + %xy = or i8 %x, %y + %cmp = icmp ne i8 %xy, 124 + call void @llvm.assume(i1 %cmp) + %r = and i8 %x, 3 + ret i8 %r +} declare void @use(i1) declare void @sink(i8) -- cgit v1.1 From 05cff99a29b2df17142907aa81ae488559756f01 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Thu, 28 Mar 2024 14:24:01 -0500 Subject: [ValueTracking] Infer known bits fromfrom `(icmp eq (and/or x,y), C)` In `(icmp eq (and x,y), C)` all 1s in `C` must also be set in both `x`/`y`. In `(icmp eq (or x,y), C)` all 0s in `C` must also be set in both `x`/`y`. Closes #87143 --- llvm/lib/Analysis/ValueTracking.cpp | 21 +++++++++++++++------ llvm/test/Transforms/InstCombine/known-bits.ll | 14 +++++--------- llvm/test/Transforms/InstCombine/zext-or-icmp.ll | 10 +++++----- 3 files changed, 25 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index b5e8a1d..33a6986 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -648,6 +648,7 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, auto m_V = m_CombineOr(m_Specific(V), m_PtrToIntSameSize(Q.DL, m_Specific(V))); + Value *Y; const APInt *Mask, *C; uint64_t ShAmt; switch (Pred) { @@ -656,16 +657,18 @@ static void computeKnownBitsFromCmp(const Value *V, CmpInst::Predicate Pred, if (match(LHS, m_V) && match(RHS, m_APInt(C))) { Known = Known.unionWith(KnownBits::makeConstant(*C)); // assume(V & Mask = C) - } else if (match(LHS, m_And(m_V, m_APInt(Mask))) && + } else if (match(LHS, m_c_And(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) { // For one bits in Mask, we can propagate bits from C to V. - Known.Zero |= ~*C & *Mask; - Known.One |= *C & *Mask; + Known.One |= *C; + if (match(Y, m_APInt(Mask))) + Known.Zero |= ~*C & *Mask; // assume(V | Mask = C) - } else if (match(LHS, m_Or(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { + } else if (match(LHS, m_c_Or(m_V, m_Value(Y))) && match(RHS, m_APInt(C))) { // For zero bits in Mask, we can propagate bits from C to V. - Known.Zero |= ~*C & ~*Mask; - Known.One |= *C & ~*Mask; + Known.Zero |= ~*C; + if (match(Y, m_APInt(Mask))) + Known.One |= *C & ~*Mask; // assume(V ^ Mask = C) } else if (match(LHS, m_Xor(m_V, m_APInt(Mask))) && match(RHS, m_APInt(C))) { @@ -9276,11 +9279,17 @@ void llvm::findValuesAffectedByCondition( if (ICmpInst::isEquality(Pred)) { if (match(B, m_ConstantInt())) { + Value *Y; // (X & C) or (X | C) or (X ^ C). // (X << C) or (X >>_s C) or (X >>_u C). if (match(A, m_BitwiseLogic(m_Value(X), m_ConstantInt())) || match(A, m_Shift(m_Value(X), m_ConstantInt()))) AddAffected(X); + else if (match(A, m_And(m_Value(X), m_Value(Y))) || + match(A, m_Or(m_Value(X), m_Value(Y)))) { + AddAffected(X); + AddAffected(Y); + } } } else { // Handle (A + C1) u< C2, which is the canonical form of diff --git a/llvm/test/Transforms/InstCombine/known-bits.ll b/llvm/test/Transforms/InstCombine/known-bits.ll index af3db82..769f766 100644 --- a/llvm/test/Transforms/InstCombine/known-bits.ll +++ b/llvm/test/Transforms/InstCombine/known-bits.ll @@ -534,8 +534,7 @@ define i8 @and_eq_bits_must_be_set(i8 %x, i8 %y) { ; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 1 ; %xy = and i8 %x, %y %cmp = icmp eq i8 %xy, 123 @@ -549,8 +548,7 @@ define i8 @and_eq_bits_must_be_set2(i8 %x, i8 %y) { ; CHECK-NEXT: [[XY:%.*]] = and i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 123 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 11 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 11 ; %xy = and i8 %x, %y %cmp = icmp eq i8 %xy, 123 @@ -579,8 +577,7 @@ define i8 @or_eq_bits_must_be_unset(i8 %x, i8 %y) { ; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[X]], 3 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %xy = or i8 %x, %y %cmp = icmp eq i8 %xy, 124 @@ -594,8 +591,7 @@ define i8 @or_eq_bits_must_be_unset2(i8 %x, i8 %y) { ; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 1 -; CHECK-NEXT: ret i8 [[R]] +; CHECK-NEXT: ret i8 0 ; %xy = or i8 %x, %y %cmp = icmp eq i8 %xy, 124 @@ -609,7 +605,7 @@ define i8 @or_eq_bits_must_be_unset2_partial_fail(i8 %x, i8 %y) { ; CHECK-NEXT: [[XY:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] ; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[XY]], 124 ; CHECK-NEXT: call void @llvm.assume(i1 [[CMP]]) -; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 7 +; CHECK-NEXT: [[R:%.*]] = and i8 [[Y]], 4 ; CHECK-NEXT: ret i8 [[R]] ; %xy = or i8 %x, %y diff --git a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll index 661c360..a4b74aa 100644 --- a/llvm/test/Transforms/InstCombine/zext-or-icmp.ll +++ b/llvm/test/Transforms/InstCombine/zext-or-icmp.ll @@ -1,5 +1,5 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py -; RUN: opt < %s -passes=instcombine -S | FileCheck %s +; RUN: opt < %s -passes='instcombine' -S | FileCheck %s define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) { ; CHECK-LABEL: @zext_or_icmp_icmp( @@ -180,11 +180,11 @@ define i8 @PR49475_infloop(i32 %t0, i16 %insert, i64 %e, i8 %i162) { ; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[SUB17]], 32 ; CHECK-NEXT: [[CONV18:%.*]] = ashr exact i64 [[SEXT]], 32 ; CHECK-NEXT: [[CMP:%.*]] = icmp sge i64 [[XOR]], [[CONV18]] -; CHECK-NEXT: [[CONV19:%.*]] = zext i1 [[CMP]] to i16 -; CHECK-NEXT: [[OR21:%.*]] = or i16 [[CONV19]], [[INSERT]] -; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = icmp eq i16 [[OR21]], 0 +; CHECK-NEXT: [[TRUNC44:%.*]] = zext i1 [[CMP]] to i8 +; CHECK-NEXT: [[INC:%.*]] = add i8 [[TRUNC44]], [[I162]] +; CHECK-NEXT: [[TOBOOL23_NOT:%.*]] = xor i1 [[CMP]], true ; CHECK-NEXT: call void @llvm.assume(i1 [[TOBOOL23_NOT]]) -; CHECK-NEXT: ret i8 [[I162]] +; CHECK-NEXT: ret i8 [[INC]] ; %b = icmp eq i32 %t0, 0 %b2 = icmp eq i16 %insert, 0 -- cgit v1.1 From 74447cf46f97f24f52ac6675d642e6cc771447bb Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 20 Mar 2024 23:16:28 -0500 Subject: [ValueTracking] Add tests for deducing more conditions in `isTruePredicate`; NFC --- llvm/test/Transforms/InstCombine/implies.ll | 440 +++++++++++++++++++++++++++ llvm/test/Transforms/InstSimplify/implies.ll | 26 ++ 2 files changed, 466 insertions(+) create mode 100644 llvm/test/Transforms/InstCombine/implies.ll diff --git a/llvm/test/Transforms/InstCombine/implies.ll b/llvm/test/Transforms/InstCombine/implies.ll new file mode 100644 index 0000000..6741d59 --- /dev/null +++ b/llvm/test/Transforms/InstCombine/implies.ll @@ -0,0 +1,440 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt < %s -passes=instcombine -S | FileCheck %s + +define i1 @or_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_implies_sle( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], 23 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[OR]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %x, 23 + %cond = icmp sle i8 %or, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_implies_sle_fail( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], -34 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[OR]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %x, -34 + %cond = icmp sle i8 %or, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_distjoint_implies_ule(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_distjoint_implies_ule( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_distjoint_implies_ule_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @or_distjoint_implies_ule_fail( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 28 +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 28 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @or_prove_distjoin_implies_ule(i8 %xx, i8 %y, i1 %other) { +; CHECK-LABEL: @or_prove_distjoin_implies_ule( +; CHECK-NEXT: [[X:%.*]] = and i8 [[XX:%.*]], -16 +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X]], 10 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x = and i8 %xx, -16 + %x1 = or i8 %x, 7 + %x2 = or i8 %x, 10 + + %cond = icmp ule i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_distjoint_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_or_distjoint_implies_sle( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_distjoint_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_or_distjoint_implies_sle_fail( +; CHECK-NEXT: [[X2:%.*]] = or disjoint i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp slt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = or disjoint i8 %x, 23 + %x2 = or disjoint i8 %x, 24 + + %cond = icmp sle i8 %y, %x2 + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_addnsw_implies_sle(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_addnsw_implies_sle( +; CHECK-NEXT: [[X2:%.*]] = add nsw i8 [[X:%.*]], 24 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = add nsw i8 [[X]], 23 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = add nsw i8 %x, 23 + %x2 = add nsw i8 %x, 24 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_addnsw_implies_sle_fail(i8 %x, i8 %y, i1 %other) { +; CHECK-LABEL: @src_addnsw_implies_sle_fail( +; CHECK-NEXT: [[X2:%.*]] = add nsw i8 [[X:%.*]], 23 +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[X1:%.*]] = add nsw i8 [[X]], 24 +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %x1 = add nsw i8 %x, 24 + %x2 = add nsw i8 %x, 23 + + %cond = icmp sle i8 %x2, %y + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x1, %y + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_ult(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_ult( +; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = and i8 [[Z]], [[X]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ult i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %z, %x + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_ult_fail(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_ult_fail( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Z]] +; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ule i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %x, %z + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_and_implies_slt_fail(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_and_implies_slt_fail( +; CHECK-NEXT: [[COND:%.*]] = icmp slt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = and i8 [[X]], [[Y:%.*]] +; CHECK-NEXT: [[R:%.*]] = icmp slt i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp slt i8 %x, %z + br i1 %cond, label %T, label %F +T: + %and = and i8 %x, %y + %r = icmp slt i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_or_implies_ule( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[OR]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %or = or i8 %y, %x + %cond = icmp uge i8 %z, %or + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_or_implies_false_ugt_todo(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_or_implies_false_ugt_todo( +; CHECK-NEXT: [[OR:%.*]] = or i8 [[X:%.*]], [[Y:%.*]] +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[OR]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; CHECK: F: +; CHECK-NEXT: [[R:%.*]] = icmp ugt i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %or = or i8 %x, %y + %cond = icmp ugt i8 %or, %z + br i1 %cond, label %T, label %F +T: + ret i1 %other +F: + %r = icmp ugt i8 %x, %z + ret i1 %r + +} + +define i1 @src_udiv_implies_ult(i8 %x, i8 %z, i1 %other) { +; CHECK-LABEL: @src_udiv_implies_ult( +; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] +; CHECK: T: +; CHECK-NEXT: [[AND:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ugt i8 %z, %x + br i1 %cond, label %T, label %F +T: + %and = udiv i8 %x, 3 + %r = icmp ult i8 %and, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_udiv_implies_ult2(i8 %x, i8 %z, i1 %other) { +; CHECK-LABEL: @src_udiv_implies_ult2( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[Z:%.*]], [[X:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; CHECK: F: +; CHECK-NEXT: [[AND:%.*]] = udiv i8 [[X]], 3 +; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[AND]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; + %cond = icmp ule i8 %z, %x + br i1 %cond, label %T, label %F +T: + ret i1 %other +F: + %and = udiv i8 %x, 3 + %r = icmp ult i8 %and, %z + ret i1 %r +} + +define i1 @src_smin_implies_sle(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_smin_implies_sle( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp sle i8 %x, %z + br i1 %cond, label %T, label %F +T: + %um = call i8 @llvm.smin.i8(i8 %x, i8 %y) + %r = icmp sle i8 %um, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_umin_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_umin_implies_ule( +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X:%.*]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: ret i1 true +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %cond = icmp ule i8 %x, %z + br i1 %cond, label %T, label %F +T: + %um = call i8 @llvm.umin.i8(i8 %x, i8 %y) + %r = icmp ule i8 %um, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_umax_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_umax_implies_ule( +; CHECK-NEXT: [[UM:%.*]] = call i8 @llvm.umax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[UM]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %um = call i8 @llvm.umax.i8(i8 %x, i8 %y) + %cond = icmp ule i8 %um, %z + br i1 %cond, label %T, label %F +T: + %r = icmp ule i8 %x, %z + ret i1 %r +F: + ret i1 %other +} + +define i1 @src_smax_implies_sle(i8 %x, i8 %y, i8 %z, i1 %other) { +; CHECK-LABEL: @src_smax_implies_sle( +; CHECK-NEXT: [[UM:%.*]] = call i8 @llvm.smax.i8(i8 [[X:%.*]], i8 [[Y:%.*]]) +; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[UM]], [[Z:%.*]] +; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] +; CHECK: T: +; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Z]] +; CHECK-NEXT: ret i1 [[R]] +; CHECK: F: +; CHECK-NEXT: ret i1 [[OTHER:%.*]] +; + %um = call i8 @llvm.smax.i8(i8 %x, i8 %y) + %cond = icmp sle i8 %um, %z + br i1 %cond, label %T, label %F +T: + %r = icmp sle i8 %x, %z + ret i1 %r +F: + ret i1 %other +} diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index b70dc90..8a01190 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -166,6 +166,19 @@ define i1 @test10(i32 %length.i, i32 %x.full) { ret i1 %res } +define i1 @test10_with_disjoint(i32 %length.i, i32 %x.full) { +; CHECK-LABEL: @test10_with_disjoint( +; CHECK-NEXT: ret i1 true +; + %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000 + %large = or disjoint i32 %x, 100 + %small = or disjoint i32 %x, 90 + %known = icmp ult i32 %large, %length.i + %to.prove = icmp ult i32 %small, %length.i + %res = icmp ule i1 %known, %to.prove + ret i1 %res +} + define i1 @test11(i32 %length.i, i32 %x) { ; CHECK-LABEL: @test11( ; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X:%.*]], 100 @@ -227,6 +240,19 @@ define i1 @test14(i32 %length.i, i32 %x.full) { ret i1 %res } +define i1 @test14_with_disjoint(i32 %length.i, i32 %x.full) { +; CHECK-LABEL: @test14_with_disjoint( +; CHECK-NEXT: ret i1 true +; + %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f + %large = or disjoint i32 %x, 8224 ;; == 0x2020 + %small = or disjoint i32 %x, 4112 ;; == 0x1010 + %known = icmp ult i32 %large, %length.i + %to.prove = icmp ult i32 %small, %length.i + %res = icmp ule i1 %known, %to.prove + ret i1 %res +} + define i1 @test15(i32 %length.i, i32 %x) { ; CHECK-LABEL: @test15( ; CHECK-NEXT: [[LARGE:%.*]] = add nuw i32 [[X:%.*]], 100 -- cgit v1.1 From 678f32ab66508aea2068a5e4e07d53b71ce5cf31 Mon Sep 17 00:00:00 2001 From: Noah Goldstein Date: Wed, 20 Mar 2024 22:08:22 -0500 Subject: [ValueTracking] Add more conditions in to `isTruePredicate` There is one notable "regression". This patch replaces the bespoke `or disjoint` logic we a direct match. This means we fail some simplification during `instsimplify`. All the cases we fail in `instsimplify` we do handle in `instcombine` as we add `disjoint` flags. Other than that, just some basic cases. See proofs: https://alive2.llvm.org/ce/z/_-g7C8 Closes #86083 --- llvm/lib/Analysis/ValueTracking.cpp | 89 +++++++++++++++++----------- llvm/test/Transforms/InstCombine/implies.ll | 36 ++++------- llvm/test/Transforms/InstSimplify/implies.ll | 16 ++++- 3 files changed, 77 insertions(+), 64 deletions(-) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index 33a6986..5ad4da4 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -8393,8 +8393,7 @@ bool llvm::matchSimpleRecurrence(const BinaryOperator *I, PHINode *&P, /// Return true if "icmp Pred LHS RHS" is always true. static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, - const Value *RHS, const DataLayout &DL, - unsigned Depth) { + const Value *RHS) { if (ICmpInst::isTrueWhenEqual(Pred) && LHS == RHS) return true; @@ -8406,8 +8405,26 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, const APInt *C; // LHS s<= LHS +_{nsw} C if C >= 0 - if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C)))) + // LHS s<= LHS | C if C >= 0 + if (match(RHS, m_NSWAdd(m_Specific(LHS), m_APInt(C))) || + match(RHS, m_Or(m_Specific(LHS), m_APInt(C)))) return !C->isNegative(); + + // LHS s<= smax(LHS, V) for any V + if (match(RHS, m_c_SMax(m_Specific(LHS), m_Value()))) + return true; + + // smin(RHS, V) s<= RHS for any V + if (match(LHS, m_c_SMin(m_Specific(RHS), m_Value()))) + return true; + + // Match A to (X +_{nsw} CA) and B to (X +_{nsw} CB) + const Value *X; + const APInt *CLHS, *CRHS; + if (match(LHS, m_NSWAddLike(m_Value(X), m_APInt(CLHS))) && + match(RHS, m_NSWAddLike(m_Specific(X), m_APInt(CRHS)))) + return CLHS->sle(*CRHS); + return false; } @@ -8417,34 +8434,36 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, cast(RHS)->hasNoUnsignedWrap()) return true; + // LHS u<= LHS | V for any V + if (match(RHS, m_c_Or(m_Specific(LHS), m_Value()))) + return true; + + // LHS u<= umax(LHS, V) for any V + if (match(RHS, m_c_UMax(m_Specific(LHS), m_Value()))) + return true; + // RHS >> V u<= RHS for any V if (match(LHS, m_LShr(m_Specific(RHS), m_Value()))) return true; - // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) - auto MatchNUWAddsToSameValue = [&](const Value *A, const Value *B, - const Value *&X, - const APInt *&CA, const APInt *&CB) { - if (match(A, m_NUWAdd(m_Value(X), m_APInt(CA))) && - match(B, m_NUWAdd(m_Specific(X), m_APInt(CB)))) - return true; + // RHS u/ C_ugt_1 u<= RHS + const APInt *C; + if (match(LHS, m_UDiv(m_Specific(RHS), m_APInt(C))) && C->ugt(1)) + return true; - // If X & C == 0 then (X | C) == X +_{nuw} C - if (match(A, m_Or(m_Value(X), m_APInt(CA))) && - match(B, m_Or(m_Specific(X), m_APInt(CB)))) { - KnownBits Known(CA->getBitWidth()); - computeKnownBits(X, Known, DL, Depth + 1, /*AC*/ nullptr, - /*CxtI*/ nullptr, /*DT*/ nullptr); - if (CA->isSubsetOf(Known.Zero) && CB->isSubsetOf(Known.Zero)) - return true; - } + // RHS & V u<= RHS for any V + if (match(LHS, m_c_And(m_Specific(RHS), m_Value()))) + return true; - return false; - }; + // umin(RHS, V) u<= RHS for any V + if (match(LHS, m_c_UMin(m_Specific(RHS), m_Value()))) + return true; + // Match A to (X +_{nuw} CA) and B to (X +_{nuw} CB) const Value *X; const APInt *CLHS, *CRHS; - if (MatchNUWAddsToSameValue(LHS, RHS, X, CLHS, CRHS)) + if (match(LHS, m_NUWAddLike(m_Value(X), m_APInt(CLHS))) && + match(RHS, m_NUWAddLike(m_Specific(X), m_APInt(CRHS)))) return CLHS->ule(*CRHS); return false; @@ -8456,37 +8475,36 @@ static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS, /// ALHS ARHS" is true. Otherwise, return std::nullopt. static std::optional isImpliedCondOperands(CmpInst::Predicate Pred, const Value *ALHS, - const Value *ARHS, const Value *BLHS, const Value *BRHS, - const DataLayout &DL, unsigned Depth) { + const Value *ARHS, const Value *BLHS, const Value *BRHS) { switch (Pred) { default: return std::nullopt; case CmpInst::ICMP_SLT: case CmpInst::ICMP_SLE: - if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_SLE, BLHS, ALHS) && + isTruePredicate(CmpInst::ICMP_SLE, ARHS, BRHS)) return true; return std::nullopt; case CmpInst::ICMP_SGT: case CmpInst::ICMP_SGE: - if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_SLE, ALHS, BLHS) && + isTruePredicate(CmpInst::ICMP_SLE, BRHS, ARHS)) return true; return std::nullopt; case CmpInst::ICMP_ULT: case CmpInst::ICMP_ULE: - if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_ULE, BLHS, ALHS) && + isTruePredicate(CmpInst::ICMP_ULE, ARHS, BRHS)) return true; return std::nullopt; case CmpInst::ICMP_UGT: case CmpInst::ICMP_UGE: - if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS, DL, Depth) && - isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS, DL, Depth)) + if (isTruePredicate(CmpInst::ICMP_ULE, ALHS, BLHS) && + isTruePredicate(CmpInst::ICMP_ULE, BRHS, ARHS)) return true; return std::nullopt; } @@ -8530,7 +8548,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, CmpInst::Predicate RPred, const Value *R0, const Value *R1, const DataLayout &DL, - bool LHSIsTrue, unsigned Depth) { + bool LHSIsTrue) { Value *L0 = LHS->getOperand(0); Value *L1 = LHS->getOperand(1); @@ -8577,7 +8595,7 @@ static std::optional isImpliedCondICmps(const ICmpInst *LHS, return LPred == RPred; if (LPred == RPred) - return isImpliedCondOperands(LPred, L0, L1, R0, R1, DL, Depth); + return isImpliedCondOperands(LPred, L0, L1, R0, R1); return std::nullopt; } @@ -8639,8 +8657,7 @@ llvm::isImpliedCondition(const Value *LHS, CmpInst::Predicate RHSPred, // Both LHS and RHS are icmps. const ICmpInst *LHSCmp = dyn_cast(LHS); if (LHSCmp) - return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue, - Depth); + return isImpliedCondICmps(LHSCmp, RHSPred, RHSOp0, RHSOp1, DL, LHSIsTrue); /// The LHS should be an 'or', 'and', or a 'select' instruction. We expect /// the RHS to be an icmp. diff --git a/llvm/test/Transforms/InstCombine/implies.ll b/llvm/test/Transforms/InstCombine/implies.ll index 6741d59..c02d84d 100644 --- a/llvm/test/Transforms/InstCombine/implies.ll +++ b/llvm/test/Transforms/InstCombine/implies.ll @@ -7,8 +7,7 @@ define i1 @or_implies_sle(i8 %x, i8 %y, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[OR]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Y]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -49,9 +48,7 @@ define i1 @or_distjoint_implies_ule(i8 %x, i8 %y, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[X2]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 -; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X1]], [[Y]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -121,9 +118,7 @@ define i1 @src_or_distjoint_implies_sle(i8 %x, i8 %y, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[X1:%.*]] = or disjoint i8 [[X]], 23 -; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -169,9 +164,7 @@ define i1 @src_addnsw_implies_sle(i8 %x, i8 %y, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[X2]], [[Y:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[X1:%.*]] = add nsw i8 [[X]], 23 -; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X1]], [[Y]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -216,9 +209,7 @@ define i1 @src_and_implies_ult(i8 %x, i8 %y, i8 %z, i1 %other) { ; CHECK-NEXT: [[COND:%.*]] = icmp ult i8 [[X:%.*]], [[Z:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: [[AND:%.*]] = and i8 [[Z]], [[X]] -; CHECK-NEXT: [[R:%.*]] = icmp ne i8 [[AND]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -280,8 +271,7 @@ define i1 @src_or_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[OR]], [[Z:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -322,9 +312,7 @@ define i1 @src_udiv_implies_ult(i8 %x, i8 %z, i1 %other) { ; CHECK-NEXT: [[COND:%.*]] = icmp ugt i8 [[Z:%.*]], [[X:%.*]] ; CHECK-NEXT: br i1 [[COND]], label [[T:%.*]], label [[F:%.*]] ; CHECK: T: -; CHECK-NEXT: [[AND:%.*]] = udiv i8 [[X]], 3 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[AND]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -345,9 +333,7 @@ define i1 @src_udiv_implies_ult2(i8 %x, i8 %z, i1 %other) { ; CHECK: T: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; CHECK: F: -; CHECK-NEXT: [[AND:%.*]] = udiv i8 [[X]], 3 -; CHECK-NEXT: [[R:%.*]] = icmp ult i8 [[AND]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; %cond = icmp ule i8 %z, %x br i1 %cond, label %T, label %F @@ -403,8 +389,7 @@ define i1 @src_umax_implies_ule(i8 %x, i8 %y, i8 %z, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp ugt i8 [[UM]], [[Z:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[R:%.*]] = icmp ule i8 [[X]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; @@ -424,8 +409,7 @@ define i1 @src_smax_implies_sle(i8 %x, i8 %y, i8 %z, i1 %other) { ; CHECK-NEXT: [[COND_NOT:%.*]] = icmp sgt i8 [[UM]], [[Z:%.*]] ; CHECK-NEXT: br i1 [[COND_NOT]], label [[F:%.*]], label [[T:%.*]] ; CHECK: T: -; CHECK-NEXT: [[R:%.*]] = icmp sle i8 [[X]], [[Z]] -; CHECK-NEXT: ret i1 [[R]] +; CHECK-NEXT: ret i1 true ; CHECK: F: ; CHECK-NEXT: ret i1 [[OTHER:%.*]] ; diff --git a/llvm/test/Transforms/InstSimplify/implies.ll b/llvm/test/Transforms/InstSimplify/implies.ll index 8a01190..7e3cb65 100644 --- a/llvm/test/Transforms/InstSimplify/implies.ll +++ b/llvm/test/Transforms/InstSimplify/implies.ll @@ -155,7 +155,13 @@ define i1 @test9(i32 %length.i, i32 %i) { define i1 @test10(i32 %length.i, i32 %x.full) { ; CHECK-LABEL: @test10( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[X:%.*]] = and i32 [[X_FULL:%.*]], -65536 +; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X]], 100 +; CHECK-NEXT: [[SMALL:%.*]] = or i32 [[X]], 90 +; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], [[LENGTH_I:%.*]] +; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], [[LENGTH_I]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]] +; CHECK-NEXT: ret i1 [[RES]] ; %x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000 %large = or i32 %x, 100 @@ -229,7 +235,13 @@ define i1 @test13(i32 %length.i, i32 %x) { define i1 @test14(i32 %length.i, i32 %x.full) { ; CHECK-LABEL: @test14( -; CHECK-NEXT: ret i1 true +; CHECK-NEXT: [[X:%.*]] = and i32 [[X_FULL:%.*]], -61681 +; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X]], 8224 +; CHECK-NEXT: [[SMALL:%.*]] = or i32 [[X]], 4112 +; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], [[LENGTH_I:%.*]] +; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], [[LENGTH_I]] +; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]] +; CHECK-NEXT: ret i1 [[RES]] ; %x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f %large = or i32 %x, 8224 ;; == 0x2020 -- cgit v1.1 From 515d3f7d62679cba178fb3603db963baa6ec8c93 Mon Sep 17 00:00:00 2001 From: Cyndy Ishida Date: Thu, 4 Apr 2024 10:45:55 -0700 Subject: [TextAPI] Reorder addRPath parameters (#87601) It matches up with other _attribute_ adding member functions and helps simplify InterfaceFile assignment for InstallAPI. --- llvm/include/llvm/TextAPI/InterfaceFile.h | 4 ++-- llvm/lib/TextAPI/InterfaceFile.cpp | 8 ++++---- llvm/lib/TextAPI/TextStubV5.cpp | 2 +- llvm/unittests/TextAPI/TextStubV5Tests.cpp | 4 ++-- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/TextAPI/InterfaceFile.h b/llvm/include/llvm/TextAPI/InterfaceFile.h index 10a37e3..23c27cb 100644 --- a/llvm/include/llvm/TextAPI/InterfaceFile.h +++ b/llvm/include/llvm/TextAPI/InterfaceFile.h @@ -299,9 +299,9 @@ public: } /// Set the runpath search paths. - /// \param InputTarget The target applicable to runpath search path. /// \param RPath The name of runpath. - void addRPath(const Target &InputTarget, StringRef RPath); + /// \param InputTarget The target applicable to runpath search path. + void addRPath(StringRef RPath, const Target &InputTarget); /// Get the list of runpath search paths. /// diff --git a/llvm/lib/TextAPI/InterfaceFile.cpp b/llvm/lib/TextAPI/InterfaceFile.cpp index 9979df92..79694c9 100644 --- a/llvm/lib/TextAPI/InterfaceFile.cpp +++ b/llvm/lib/TextAPI/InterfaceFile.cpp @@ -54,7 +54,7 @@ void InterfaceFile::addParentUmbrella(const Target &Target_, StringRef Parent) { ParentUmbrellas.emplace(Iter, Target_, std::string(Parent)); } -void InterfaceFile::addRPath(const Target &InputTarget, StringRef RPath) { +void InterfaceFile::addRPath(StringRef RPath, const Target &InputTarget) { if (RPath.empty()) return; using RPathEntryT = const std::pair; @@ -198,9 +198,9 @@ InterfaceFile::merge(const InterfaceFile *O) const { IF->addReexportedLibrary(Lib.getInstallName(), Target); for (const auto &[Target, Path] : rpaths()) - IF->addRPath(Target, Path); + IF->addRPath(Path, Target); for (const auto &[Target, Path] : O->rpaths()) - IF->addRPath(Target, Path); + IF->addRPath(Path, Target); for (const auto *Sym : symbols()) { IF->addSymbol(Sym->getKind(), Sym->getName(), Sym->targets(), @@ -319,7 +319,7 @@ InterfaceFile::extract(Architecture Arch) const { for (const auto &It : rpaths()) if (It.first.Arch == Arch) - IF->addRPath(It.first, It.second); + IF->addRPath(It.second, It.first); for (const auto &Lib : allowableClients()) for (const auto &Target : Lib.targets()) diff --git a/llvm/lib/TextAPI/TextStubV5.cpp b/llvm/lib/TextAPI/TextStubV5.cpp index d969810..b072c0b 100644 --- a/llvm/lib/TextAPI/TextStubV5.cpp +++ b/llvm/lib/TextAPI/TextStubV5.cpp @@ -672,7 +672,7 @@ Expected parseToInterfaceFile(const Object *File) { F->addParentUmbrella(Target, Lib); for (auto &[Path, Targets] : RPaths) for (auto Target : Targets) - F->addRPath(Target, Path); + F->addRPath(Path, Target); for (auto &[Targets, Symbols] : Exports) for (auto &Sym : Symbols) F->addSymbol(Sym.Kind, Sym.Name, Targets, Sym.Flags); diff --git a/llvm/unittests/TextAPI/TextStubV5Tests.cpp b/llvm/unittests/TextAPI/TextStubV5Tests.cpp index c77d13e..62fdd79 100644 --- a/llvm/unittests/TextAPI/TextStubV5Tests.cpp +++ b/llvm/unittests/TextAPI/TextStubV5Tests.cpp @@ -722,7 +722,7 @@ TEST(TBDv5, WriteFile) { File.setInstallName("@rpath/S/L/F/Foo.framework/Foo"); File.setCurrentVersion(PackedVersion(1, 2, 0)); File.setCompatibilityVersion(PackedVersion(1, 1, 0)); - File.addRPath(AllTargets[0], "@executable_path/.../Frameworks"); + File.addRPath("@executable_path/.../Frameworks", AllTargets[0]); for (const auto &Targ : AllTargets) { File.addParentUmbrella(Targ, "System"); @@ -897,7 +897,7 @@ TEST(TBDv5, WriteMultipleDocuments) { NestedFile.setTwoLevelNamespace(); NestedFile.setApplicationExtensionSafe(false); NestedFile.setCurrentVersion(PackedVersion(2, 1, 1)); - NestedFile.addRPath(AllTargets[0], "@executable_path/.../Frameworks"); + NestedFile.addRPath("@executable_path/.../Frameworks", AllTargets[0]); for (const auto &Targ : AllTargets) NestedFile.addReexportedLibrary("@rpath/libfoo.dylib", Targ); NestedFile.addSymbol(EncodeKind::GlobalSymbol, "_funcFoo", AllTargets, -- cgit v1.1 From d97d560fbf6ed26a198b3afe1594d7d63b88ab3a Mon Sep 17 00:00:00 2001 From: Daniil Kovalev Date: Thu, 4 Apr 2024 21:05:03 +0300 Subject: [AArch64][PAC][MC][ELF] Support PAuth ABI compatibility tag (#85236) Depends on #87545 Emit `GNU_PROPERTY_AARCH64_FEATURE_PAUTH` property in `.note.gnu.property` section depending on `aarch64-elf-pauthabi-platform` and `aarch64-elf-pauthabi-version` llvm module flags. --- llvm/lib/IR/Verifier.cpp | 22 +++++++++- llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 12 ++++-- .../AArch64/MCTargetDesc/AArch64TargetStreamer.cpp | 33 +++++++++++--- .../AArch64/MCTargetDesc/AArch64TargetStreamer.h | 3 +- .../AArch64/note-gnu-property-elf-pauthabi.ll | 50 ++++++++++++++++++++++ .../module-flags-note-gnu-property-elf-pauthabi.ll | 19 ++++++++ 6 files changed, 127 insertions(+), 12 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll create mode 100644 llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index ba0b723..64c5991 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -1734,8 +1734,28 @@ void Verifier::visitModuleFlags() { // Scan each flag, and track the flags and requirements. DenseMap SeenIDs; SmallVector Requirements; - for (const MDNode *MDN : Flags->operands()) + uint64_t PAuthABIPlatform = -1; + uint64_t PAuthABIVersion = -1; + for (const MDNode *MDN : Flags->operands()) { visitModuleFlag(MDN, SeenIDs, Requirements); + if (MDN->getNumOperands() != 3) + continue; + if (const auto *FlagName = dyn_cast_or_null(MDN->getOperand(1))) { + if (FlagName->getString() == "aarch64-elf-pauthabi-platform") { + if (const auto *PAP = + mdconst::dyn_extract_or_null(MDN->getOperand(2))) + PAuthABIPlatform = PAP->getZExtValue(); + } else if (FlagName->getString() == "aarch64-elf-pauthabi-version") { + if (const auto *PAV = + mdconst::dyn_extract_or_null(MDN->getOperand(2))) + PAuthABIVersion = PAV->getZExtValue(); + } + } + } + + if ((PAuthABIPlatform == uint64_t(-1)) != (PAuthABIVersion == uint64_t(-1))) + CheckFailed("either both or no 'aarch64-elf-pauthabi-platform' and " + "'aarch64-elf-pauthabi-version' module flags must be present"); // Validate that the requirements in the module are valid. for (const MDNode *Requirement : Requirements) { diff --git a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 4fa719a..f6ccd0e 100644 --- a/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -268,13 +268,19 @@ void AArch64AsmPrinter::emitStartOfAsmFile(Module &M) { if (Sign->getZExtValue()) Flags |= ELF::GNU_PROPERTY_AARCH64_FEATURE_1_PAC; - if (Flags == 0) - return; + uint64_t PAuthABIPlatform = -1; + if (const auto *PAP = mdconst::extract_or_null( + M.getModuleFlag("aarch64-elf-pauthabi-platform"))) + PAuthABIPlatform = PAP->getZExtValue(); + uint64_t PAuthABIVersion = -1; + if (const auto *PAV = mdconst::extract_or_null( + M.getModuleFlag("aarch64-elf-pauthabi-version"))) + PAuthABIVersion = PAV->getZExtValue(); // Emit a .note.gnu.property section with the flags. auto *TS = static_cast(OutStreamer->getTargetStreamer()); - TS->emitNoteSection(Flags); + TS->emitNoteSection(Flags, PAuthABIPlatform, PAuthABIVersion); } void AArch64AsmPrinter::emitFunctionHeaderComment() { diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp index e1d6dd7..dc5383c 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.cpp @@ -58,8 +58,17 @@ void AArch64TargetStreamer::finish() { emitNoteSection(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_BTI); } -void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { - if (Flags == 0) +void AArch64TargetStreamer::emitNoteSection(unsigned Flags, + uint64_t PAuthABIPlatform, + uint64_t PAuthABIVersion) { + assert((PAuthABIPlatform == uint64_t(-1)) == + (PAuthABIVersion == uint64_t(-1))); + uint64_t DescSz = 0; + if (Flags != 0) + DescSz += 4 * 4; + if (PAuthABIPlatform != uint64_t(-1)) + DescSz += 4 + 4 + 8 * 2; + if (DescSz == 0) return; MCStreamer &OutStreamer = getStreamer(); @@ -80,15 +89,25 @@ void AArch64TargetStreamer::emitNoteSection(unsigned Flags) { // Emit the note header. OutStreamer.emitValueToAlignment(Align(8)); OutStreamer.emitIntValue(4, 4); // data size for "GNU\0" - OutStreamer.emitIntValue(4 * 4, 4); // Elf_Prop size + OutStreamer.emitIntValue(DescSz, 4); // Elf_Prop array size OutStreamer.emitIntValue(ELF::NT_GNU_PROPERTY_TYPE_0, 4); OutStreamer.emitBytes(StringRef("GNU", 4)); // note name // Emit the PAC/BTI properties. - OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); - OutStreamer.emitIntValue(4, 4); // data size - OutStreamer.emitIntValue(Flags, 4); // data - OutStreamer.emitIntValue(0, 4); // pad + if (Flags != 0) { + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_1_AND, 4); + OutStreamer.emitIntValue(4, 4); // data size + OutStreamer.emitIntValue(Flags, 4); // data + OutStreamer.emitIntValue(0, 4); // pad + } + + // Emit the PAuth ABI compatibility info + if (PAuthABIPlatform != uint64_t(-1)) { + OutStreamer.emitIntValue(ELF::GNU_PROPERTY_AARCH64_FEATURE_PAUTH, 4); + OutStreamer.emitIntValue(8 * 2, 4); // data size + OutStreamer.emitIntValue(PAuthABIPlatform, 8); + OutStreamer.emitIntValue(PAuthABIVersion, 8); + } OutStreamer.endSection(Nt); OutStreamer.switchSection(Cur); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h index 7676d88..e8a9dc4 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64TargetStreamer.h @@ -35,7 +35,8 @@ public: void emitCurrentConstantPool(); /// Callback used to implement the .note.gnu.property section. - void emitNoteSection(unsigned Flags); + void emitNoteSection(unsigned Flags, uint64_t PAuthABIPlatform = -1, + uint64_t PAuthABIVersion = -1); /// Callback used to implement the .inst directive. virtual void emitInst(uint32_t Inst); diff --git a/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll new file mode 100644 index 0000000..728cffe --- /dev/null +++ b/llvm/test/CodeGen/AArch64/note-gnu-property-elf-pauthabi.ll @@ -0,0 +1,50 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +;--- ok.ll + +; RUN: llc -mtriple=aarch64-linux ok.ll -o - | \ +; RUN: FileCheck %s --check-prefix=ASM +; RUN: llc -mtriple=aarch64-linux ok.ll -filetype=obj -o - | \ +; RUN: llvm-readelf --notes - | FileCheck %s --check-prefix=OBJ + +!llvm.module.flags = !{!0, !1} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 268435458} +!1 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 85} + +; ASM: .section .note.gnu.property,"a",@note +; ASM-NEXT: .p2align 3, 0x0 +; ASM-NEXT: .word 4 +; ASM-NEXT: .word 24 +; ASM-NEXT: .word 5 +; ASM-NEXT: .asciz "GNU" +; 3221225473 = 0xc0000001 = GNU_PROPERTY_AARCH64_FEATURE_PAUTH +; ASM-NEXT: .word 3221225473 +; ASM-NEXT: .word 16 +; ASM-NEXT: .xword 268435458 +; ASM-NEXT: .xword 85 + +; OBJ: Displaying notes found in: .note.gnu.property +; OBJ-NEXT: Owner Data size Description +; OBJ-NEXT: GNU 0x00000018 NT_GNU_PROPERTY_TYPE_0 (property note) +; OBJ-NEXT: AArch64 PAuth ABI core info: platform 0x10000002 (llvm_linux), version 0x55 (PointerAuthIntrinsics, !PointerAuthCalls, PointerAuthReturns, !PointerAuthAuthTraps, PointerAuthVTPtrAddressDiscrimination, !PointerAuthVTPtrTypeDiscrimination, PointerAuthInitFini) + +; ERR: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present + +;--- err1.ll + +; RUN: not llc -mtriple=aarch64-linux err1.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2} + +;--- err2.ll + +; RUN: not llc -mtriple=aarch64-linux err2.ll 2>&1 -o - | \ +; RUN: FileCheck %s --check-prefix=ERR + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31} diff --git a/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll b/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll new file mode 100644 index 0000000..435073d --- /dev/null +++ b/llvm/test/Verifier/module-flags-note-gnu-property-elf-pauthabi.ll @@ -0,0 +1,19 @@ +; RUN: rm -rf %t && split-file %s %t && cd %t + +; CHECK: either both or no 'aarch64-elf-pauthabi-platform' and 'aarch64-elf-pauthabi-version' module flags must be present + +;--- err1.ll + +; RUN: not llvm-as err1.ll -o /dev/null 2>&1 | FileCheck %s + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-platform", i32 2} + +;--- err2.ll + +; RUN: not llvm-as err2.ll -o /dev/null 2>&1 | FileCheck %s + +!llvm.module.flags = !{!0} + +!0 = !{i32 1, !"aarch64-elf-pauthabi-version", i32 31} -- cgit v1.1 From 53fe94a0ce262c6e38117429a30814f54ea55b0f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Thu, 4 Apr 2024 19:13:03 +0100 Subject: [CostModel][X86] Add costkinds test coverage for masked load/store/gather/scatter Noticed while starting triage for #87640 --- .../CostModel/X86/masked-intrinsic-codesize.ll | 2413 ++++++++++++++++++++ .../X86/masked-intrinsic-cost-inseltpoison.ll | 16 +- .../CostModel/X86/masked-intrinsic-cost.ll | 16 +- .../CostModel/X86/masked-intrinsic-latency.ll | 2413 ++++++++++++++++++++ .../CostModel/X86/masked-intrinsic-sizelatency.ll | 2413 ++++++++++++++++++++ 5 files changed, 7255 insertions(+), 16 deletions(-) create mode 100644 llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll create mode 100644 llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll create mode 100644 llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll new file mode 100644 index 0000000..55fdaaf --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-codesize.ll @@ -0,0 +1,2413 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=code-size -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load() { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store() { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_gather() { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter() { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_scatter' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 194 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_expandload() { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_expandload' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore() { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll index 897344d..ad56c28 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost-inseltpoison.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 -; -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX define i32 @masked_load() { ; SSE2-LABEL: 'masked_load' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll index 5f22b2e..c7e7c46 100644 --- a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-cost.ll @@ -1,12 +1,12 @@ ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE2 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+sse4.2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=SSE42 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX1 -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mattr=+avx2 -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,AVX2 -; -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skylake -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX,SKL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=knl -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,KNL -; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -mcpu=skx -passes="print" 2>&1 -disable-output | FileCheck %s --check-prefixes=AVX512,SKX +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX define i32 @masked_load() { ; SSE2-LABEL: 'masked_load' diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll new file mode 100644 index 0000000..edb05ad --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-latency.ll @@ -0,0 +1,2413 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=latency -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load() { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store() { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_gather() { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter() { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_scatter' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 194 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_expandload() { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_expandload' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore() { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) diff --git a/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll new file mode 100644 index 0000000..3ebd9cc --- /dev/null +++ b/llvm/test/Analysis/CostModel/X86/masked-intrinsic-sizelatency.ll @@ -0,0 +1,2413 @@ +; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse2 | FileCheck %s --check-prefixes=SSE2 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE42 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 +; +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=skylake | FileCheck %s --check-prefixes=AVX,SKL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=knl | FileCheck %s --check-prefixes=AVX512,KNL +; RUN: opt < %s -S -mtriple=x86_64-apple-darwin -passes="print" 2>&1 -disable-output -cost-kind=size-latency -mcpu=skx | FileCheck %s --check-prefixes=AVX512,SKX + +define i32 @masked_load() { +; SSE2-LABEL: 'masked_load' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 101 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 89 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 83 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 77 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 41 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_load' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_load' +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_load' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_load' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.load.v8f64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V7F64 = call <7 x double> @llvm.masked.load.v7f64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x double> undef) + %V6F64 = call <6 x double> @llvm.masked.load.v6f64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x double> undef) + %V5F64 = call <5 x double> @llvm.masked.load.v5f64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.load.v4f64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V3F64 = call <3 x double> @llvm.masked.load.v3f64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.load.v2f64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.load.v1f64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.load.v16f32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V15F32 = call <15 x float> @llvm.masked.load.v15f32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x float> undef) + %V14F32 = call <14 x float> @llvm.masked.load.v14f32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x float> undef) + %V13F32 = call <13 x float> @llvm.masked.load.v13f32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x float> undef) + %V12F32 = call <12 x float> @llvm.masked.load.v12f32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x float> undef) + %V11F32 = call <11 x float> @llvm.masked.load.v11f32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x float> undef) + %V10F32 = call <10 x float> @llvm.masked.load.v10f32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x float> undef) + %V9F32 = call <9 x float> @llvm.masked.load.v9f32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.load.v8f32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V7F32 = call <7 x float> @llvm.masked.load.v7f32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x float> undef) + %V6F32 = call <6 x float> @llvm.masked.load.v6f32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x float> undef) + %V5F32 = call <5 x float> @llvm.masked.load.v5f32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.load.v4f32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V3F32 = call <3 x float> @llvm.masked.load.v3f32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.load.v2f32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x float> undef) + %V1F32 = call <1 x float> @llvm.masked.load.v1f32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.load.v8i64.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V7I64 = call <7 x i64> @llvm.masked.load.v7i64.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i64> undef) + %V6I64 = call <6 x i64> @llvm.masked.load.v6i64.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i64> undef) + %V5I64 = call <5 x i64> @llvm.masked.load.v5i64.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.load.v4i64.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V3I64 = call <3 x i64> @llvm.masked.load.v3i64.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.load.v2i64.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.load.v1i64.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V15I32 = call <15 x i32> @llvm.masked.load.v15i32.p0(ptr undef, i32 1, <15 x i1> undef, <15 x i32> undef) + %V14I32 = call <14 x i32> @llvm.masked.load.v14i32.p0(ptr undef, i32 1, <14 x i1> undef, <14 x i32> undef) + %V13I32 = call <13 x i32> @llvm.masked.load.v13i32.p0(ptr undef, i32 1, <13 x i1> undef, <13 x i32> undef) + %V12I32 = call <12 x i32> @llvm.masked.load.v12i32.p0(ptr undef, i32 1, <12 x i1> undef, <12 x i32> undef) + %V11I32 = call <11 x i32> @llvm.masked.load.v11i32.p0(ptr undef, i32 1, <11 x i1> undef, <11 x i32> undef) + %V10I32 = call <10 x i32> @llvm.masked.load.v10i32.p0(ptr undef, i32 1, <10 x i1> undef, <10 x i32> undef) + %V9I32 = call <9 x i32> @llvm.masked.load.v9i32.p0(ptr undef, i32 1, <9 x i1> undef, <9 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.load.v8i32.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V7I32 = call <7 x i32> @llvm.masked.load.v7i32.p0(ptr undef, i32 1, <7 x i1> undef, <7 x i32> undef) + %V6I32 = call <6 x i32> @llvm.masked.load.v6i32.p0(ptr undef, i32 1, <6 x i1> undef, <6 x i32> undef) + %V5I32 = call <5 x i32> @llvm.masked.load.v5i32.p0(ptr undef, i32 1, <5 x i1> undef, <5 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V3I32 = call <3 x i32> @llvm.masked.load.v3i32.p0(ptr undef, i32 1, <3 x i1> undef, <3 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr undef, i32 1, <2 x i1> undef, <2 x i32> undef) + %V1I32 = call <1 x i32> @llvm.masked.load.v1i32.p0(ptr undef, i32 1, <1 x i1> undef, <1 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.load.v32i16.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.load.v16i16.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.load.v8i16.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.load.v4i16.p0(ptr undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.load.v64i8.p0(ptr undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.load.v32i8.p0(ptr undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.load.v16i8.p0(ptr undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.load.v8i8.p0(ptr undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_store() { +; SSE2-LABEL: 'masked_store' +; SSE2-NEXT: Cost Model: Found an estimated cost of 43 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 37 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 91 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 85 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 79 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 62 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 56 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 107 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 100 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 93 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 86 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 73 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 59 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 53 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 39 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 190 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 95 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 440 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 220 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 110 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 54 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_store' +; SSE42-NEXT: Cost Model: Found an estimated cost of 36 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 71 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 61 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 57 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 52 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 42 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 75 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 70 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 65 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 55 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 320 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 160 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_store' +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 163 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 324 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_store' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 164 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 81 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 326 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 162 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_store' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.store.v8f64.p0(<8 x double> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f64.p0(<7 x double> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f64.p0(<6 x double> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f64.p0(<5 x double> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f64.p0(<4 x double> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f64.p0(<3 x double> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f64.p0(<2 x double> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f64.p0(<1 x double> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16f32.p0(<16 x float> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15f32.p0(<15 x float> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14f32.p0(<14 x float> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13f32.p0(<13 x float> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12f32.p0(<12 x float> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11f32.p0(<11 x float> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10f32.p0(<10 x float> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9f32.p0(<9 x float> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8f32.p0(<8 x float> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7f32.p0(<7 x float> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6f32.p0(<6 x float> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5f32.p0(<5 x float> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4f32.p0(<4 x float> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3f32.p0(<3 x float> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2f32.p0(<2 x float> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1f32.p0(<1 x float> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v8i64.p0(<8 x i64> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i64.p0(<7 x i64> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i64.p0(<6 x i64> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i64.p0(<5 x i64> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i64.p0(<4 x i64> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i64.p0(<3 x i64> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i64.p0(<2 x i64> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i64.p0(<1 x i64> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v16i32.p0(<16 x i32> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v15i32.p0(<15 x i32> undef, ptr undef, i32 1, <15 x i1> undef) + call void @llvm.masked.store.v14i32.p0(<14 x i32> undef, ptr undef, i32 1, <14 x i1> undef) + call void @llvm.masked.store.v13i32.p0(<13 x i32> undef, ptr undef, i32 1, <13 x i1> undef) + call void @llvm.masked.store.v12i32.p0(<12 x i32> undef, ptr undef, i32 1, <12 x i1> undef) + call void @llvm.masked.store.v11i32.p0(<11 x i32> undef, ptr undef, i32 1, <11 x i1> undef) + call void @llvm.masked.store.v10i32.p0(<10 x i32> undef, ptr undef, i32 1, <10 x i1> undef) + call void @llvm.masked.store.v9i32.p0(<9 x i32> undef, ptr undef, i32 1, <9 x i1> undef) + call void @llvm.masked.store.v8i32.p0(<8 x i32> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v7i32.p0(<7 x i32> undef, ptr undef, i32 1, <7 x i1> undef) + call void @llvm.masked.store.v6i32.p0(<6 x i32> undef, ptr undef, i32 1, <6 x i1> undef) + call void @llvm.masked.store.v5i32.p0(<5 x i32> undef, ptr undef, i32 1, <5 x i1> undef) + call void @llvm.masked.store.v4i32.p0(<4 x i32> undef, ptr undef, i32 1, <4 x i1> undef) + call void @llvm.masked.store.v3i32.p0(<3 x i32> undef, ptr undef, i32 1, <3 x i1> undef) + call void @llvm.masked.store.v2i32.p0(<2 x i32> undef, ptr undef, i32 1, <2 x i1> undef) + call void @llvm.masked.store.v1i32.p0(<1 x i32> undef, ptr undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.store.v32i16.p0(<32 x i16> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i16.p0(<16 x i16> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i16.p0(<8 x i16> undef, ptr undef, i32 1, <8 x i1> undef) + call void @llvm.masked.store.v4i16.p0(<4 x i16> undef, ptr undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.store.v64i8.p0(<64 x i8> undef, ptr undef, i32 1, <64 x i1> undef) + call void @llvm.masked.store.v32i8.p0(<32 x i8> undef, ptr undef, i32 1, <32 x i1> undef) + call void @llvm.masked.store.v16i8.p0(<16 x i8> undef, ptr undef, i32 1, <16 x i1> undef) + call void @llvm.masked.store.v8i8.p0(<8 x i8> undef, ptr undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_gather() { +; SSE2-LABEL: 'masked_gather' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_gather' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_gather' +; AVX1-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_gather' +; AVX2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 50 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 25 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_gather' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 98 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 194 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_gather' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_gather' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr> undef, i32 1, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> undef, i32 1, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr> undef, i32 1, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr> undef, i32 1, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr> undef, i32 1, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr> undef, i32 1, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr> undef, i32 1, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_scatter() { +; SSE2-LABEL: 'masked_scatter' +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 60 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 30 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 252 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 126 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 63 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_scatter' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_scatter' +; AVX-NEXT: Cost Model: Found an estimated cost of 22 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 50 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 98 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 194 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; KNL-LABEL: 'masked_scatter' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKX-LABEL: 'masked_scatter' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.scatter.v8f64.v8p0(<8 x double> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f64.v2p0(<2 x double> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1f64.v1p0(<1 x double> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16f32.v16p0(<16 x float> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8f32.v8p0(<8 x float> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4f32.v4p0(<4 x float> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2f32.v2p0(<2 x float> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v8i64.v8p0(<8 x i64> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i64.v4p0(<4 x i64> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i64.v2p0(<2 x i64> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + call void @llvm.masked.scatter.v1i64.v1p0(<1 x i64> undef, <1 x ptr> undef, i32 1, <1 x i1> undef) + + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> undef, <2 x ptr> undef, i32 1, <2 x i1> undef) + + call void @llvm.masked.scatter.v32i16.v32p0(<32 x i16> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i16.v16p0(<16 x i16> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i16.v8p0(<8 x i16> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + call void @llvm.masked.scatter.v4i16.v4p0(<4 x i16> undef, <4 x ptr> undef, i32 1, <4 x i1> undef) + + call void @llvm.masked.scatter.v64i8.v64p0(<64 x i8> undef, <64 x ptr> undef, i32 1, <64 x i1> undef) + call void @llvm.masked.scatter.v32i8.v32p0(<32 x i8> undef, <32 x ptr> undef, i32 1, <32 x i1> undef) + call void @llvm.masked.scatter.v16i8.v16p0(<16 x i8> undef, <16 x ptr> undef, i32 1, <16 x i1> undef) + call void @llvm.masked.scatter.v8i8.v8p0(<8 x i8> undef, <8 x ptr> undef, i32 1, <8 x i1> undef) + + ret i32 0 +} + +define i32 @masked_expandload() { +; SSE2-LABEL: 'masked_expandload' +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 22 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 188 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 94 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 47 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_expandload' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 28 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 128 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 64 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX-LABEL: 'masked_expandload' +; AVX-NEXT: Cost Model: Found an estimated cost of 14 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 30 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 18 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 34 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 66 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 130 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_expandload' +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 31 for instruction: %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 7 for instruction: %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 35 for instruction: %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 17 for instruction: %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 4 for instruction: %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 67 for instruction: %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 33 for instruction: %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 8 for instruction: %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 131 for instruction: %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 65 for instruction: %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 32 for instruction: %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 16 for instruction: %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + %V8F64 = call <8 x double> @llvm.masked.expandload.v8f64(ptr undef, <8 x i1> undef, <8 x double> undef) + %V4F64 = call <4 x double> @llvm.masked.expandload.v4f64(ptr undef, <4 x i1> undef, <4 x double> undef) + %V2F64 = call <2 x double> @llvm.masked.expandload.v2f64(ptr undef, <2 x i1> undef, <2 x double> undef) + %V1F64 = call <1 x double> @llvm.masked.expandload.v1f64(ptr undef, <1 x i1> undef, <1 x double> undef) + + %V16F32 = call <16 x float> @llvm.masked.expandload.v16f32(ptr undef, <16 x i1> undef, <16 x float> undef) + %V8F32 = call <8 x float> @llvm.masked.expandload.v8f32(ptr undef, <8 x i1> undef, <8 x float> undef) + %V4F32 = call <4 x float> @llvm.masked.expandload.v4f32(ptr undef, <4 x i1> undef, <4 x float> undef) + %V2F32 = call <2 x float> @llvm.masked.expandload.v2f32(ptr undef, <2 x i1> undef, <2 x float> undef) + + %V8I64 = call <8 x i64> @llvm.masked.expandload.v8i64(ptr undef, <8 x i1> undef, <8 x i64> undef) + %V4I64 = call <4 x i64> @llvm.masked.expandload.v4i64(ptr undef, <4 x i1> undef, <4 x i64> undef) + %V2I64 = call <2 x i64> @llvm.masked.expandload.v2i64(ptr undef, <2 x i1> undef, <2 x i64> undef) + %V1I64 = call <1 x i64> @llvm.masked.expandload.v1i64(ptr undef, <1 x i1> undef, <1 x i64> undef) + + %V16I32 = call <16 x i32> @llvm.masked.expandload.v16i32(ptr undef, <16 x i1> undef, <16 x i32> undef) + %V8I32 = call <8 x i32> @llvm.masked.expandload.v8i32(ptr undef, <8 x i1> undef, <8 x i32> undef) + %V4I32 = call <4 x i32> @llvm.masked.expandload.v4i32(ptr undef, <4 x i1> undef, <4 x i32> undef) + %V2I32 = call <2 x i32> @llvm.masked.expandload.v2i32(ptr undef, <2 x i1> undef, <2 x i32> undef) + + %V32I16 = call <32 x i16> @llvm.masked.expandload.v32i16(ptr undef, <32 x i1> undef, <32 x i16> undef) + %V16I16 = call <16 x i16> @llvm.masked.expandload.v16i16(ptr undef, <16 x i1> undef, <16 x i16> undef) + %V8I16 = call <8 x i16> @llvm.masked.expandload.v8i16(ptr undef, <8 x i1> undef, <8 x i16> undef) + %V4I16 = call <4 x i16> @llvm.masked.expandload.v4i16(ptr undef, <4 x i1> undef, <4 x i16> undef) + + %V64I8 = call <64 x i8> @llvm.masked.expandload.v64i8(ptr undef, <64 x i1> undef, <64 x i8> undef) + %V32I8 = call <32 x i8> @llvm.masked.expandload.v32i8(ptr undef, <32 x i1> undef, <32 x i8> undef) + %V16I8 = call <16 x i8> @llvm.masked.expandload.v16i8(ptr undef, <16 x i1> undef, <16 x i8> undef) + %V8I8 = call <8 x i8> @llvm.masked.expandload.v8i8(ptr undef, <8 x i1> undef, <8 x i8> undef) + + ret i32 0 +} + +define i32 @masked_compressstore() { +; SSE2-LABEL: 'masked_compressstore' +; SSE2-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 21 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 192 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 96 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SSE42-LABEL: 'masked_compressstore' +; SSE42-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 7 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 29 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX1-LABEL: 'masked_compressstore' +; AVX1-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 68 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 134 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX2-LABEL: 'masked_compressstore' +; AVX2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; SKL-LABEL: 'masked_compressstore' +; SKL-NEXT: Cost Model: Found an estimated cost of 15 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 31 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 16 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 4 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 35 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 18 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 67 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 34 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 132 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 66 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 33 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 17 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; +; AVX512-LABEL: 'masked_compressstore' +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 47 for instruction: call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 11 for instruction: call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 5 for instruction: call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 27 for instruction: call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 13 for instruction: call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 51 for instruction: call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 25 for instruction: call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 6 for instruction: call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 99 for instruction: call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 49 for instruction: call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 195 for instruction: call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 97 for instruction: call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 48 for instruction: call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 24 for instruction: call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret i32 0 +; + call void @llvm.masked.compressstore.v8f64(<8 x double> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f64(<4 x double> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f64(<2 x double> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1f64(<1 x double> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16f32(<16 x float> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8f32(<8 x float> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4f32(<4 x float> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2f32(<2 x float> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v8i64(<8 x i64> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i64(<4 x i64> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i64(<2 x i64> undef, ptr undef, <2 x i1> undef) + call void @llvm.masked.compressstore.v1i64(<1 x i64> undef, ptr undef, <1 x i1> undef) + + call void @llvm.masked.compressstore.v16i32(<16 x i32> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i32(<8 x i32> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i32(<4 x i32> undef, ptr undef, <4 x i1> undef) + call void @llvm.masked.compressstore.v2i32(<2 x i32> undef, ptr undef, <2 x i1> undef) + + call void @llvm.masked.compressstore.v32i16(<32 x i16> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i16(<16 x i16> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i16(<8 x i16> undef, ptr undef, <8 x i1> undef) + call void @llvm.masked.compressstore.v4i16(<4 x i16> undef, ptr undef, <4 x i1> undef) + + call void @llvm.masked.compressstore.v64i8(<64 x i8> undef, ptr undef, <64 x i1> undef) + call void @llvm.masked.compressstore.v32i8(<32 x i8> undef, ptr undef, <32 x i1> undef) + call void @llvm.masked.compressstore.v16i8(<16 x i8> undef, ptr undef, <16 x i1> undef) + call void @llvm.masked.compressstore.v8i8(<8 x i8> undef, ptr undef, <8 x i1> undef) + + ret i32 0 +} + +define <2 x double> @test1(<2 x i64> %trigger, ptr %addr, <2 x double> %dst) { +; SSE2-LABEL: 'test1' +; SSE2-NEXT: Cost Model: Found an estimated cost of 5 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test1' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX-LABEL: 'test1' +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test1' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i64> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x double> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %mask = icmp eq <2 x i64> %trigger, zeroinitializer + %res = call <2 x double> @llvm.masked.load.v2f64.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x double>%dst) + ret <2 x double> %res +} + +define <4 x i32> @test2(<4 x i32> %trigger, ptr %addr, <4 x i32> %dst) { +; SSE2-LABEL: 'test2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX-LABEL: 'test2' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX512-LABEL: 'test2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1> %mask, <4 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + %res = call <4 x i32> @llvm.masked.load.v4i32.p0(ptr %addr, i32 4, <4 x i1>%mask, <4 x i32>%dst) + ret <4 x i32> %res +} + +define void @test3(<4 x i32> %trigger, ptr %addr, <4 x i32> %val) { +; SSE2-LABEL: 'test3' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 26 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test3' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test3' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 8 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test3' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <4 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.store.v4i32.p0(<4 x i32> %val, ptr %addr, i32 4, <4 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <4 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v4i32.p0(<4 x i32>%val, ptr %addr, i32 4, <4 x i1>%mask) + ret void +} + +define <8 x float> @test4(<8 x i32> %trigger, ptr %addr, <8 x float> %dst) { +; SSE2-LABEL: 'test4' +; SSE2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 45 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SSE42-LABEL: 'test4' +; SSE42-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 38 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX1-LABEL: 'test4' +; AVX1-NEXT: Cost Model: Found an estimated cost of 6 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX2-LABEL: 'test4' +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; SKL-LABEL: 'test4' +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; +; AVX512-LABEL: 'test4' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <8 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1> %mask, <8 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <8 x float> %res +; + %mask = icmp eq <8 x i32> %trigger, zeroinitializer + %res = call <8 x float> @llvm.masked.load.v8f32.p0(ptr %addr, i32 4, <8 x i1>%mask, <8 x float>%dst) + ret <8 x float> %res +} + +define void @test5(<2 x i32> %trigger, ptr %addr, <2 x float> %val) { +; SSE2-LABEL: 'test5' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test5' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test5' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test5' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2f32.p0(<2 x float> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2f32.p0(<2 x float>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define void @test6(<2 x i32> %trigger, ptr %addr, <2 x i32> %val) { +; SSE2-LABEL: 'test6' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test6' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test6' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 9 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test6' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: call void @llvm.masked.store.v2i32.p0(<2 x i32> %val, ptr %addr, i32 4, <2 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + call void @llvm.masked.store.v2i32.p0(<2 x i32>%val, ptr %addr, i32 4, <2 x i1>%mask) + ret void +} + +define <2 x float> @test7(<2 x i32> %trigger, ptr %addr, <2 x float> %dst) { +; SSE2-LABEL: 'test7' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; SSE42-LABEL: 'test7' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX-LABEL: 'test7' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; +; AVX512-LABEL: 'test7' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x float> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x float> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x float> @llvm.masked.load.v2f32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x float>%dst) + ret <2 x float> %res +} + +define <2 x i32> @test8(<2 x i32> %trigger, ptr %addr, <2 x i32> %dst) { +; SSE2-LABEL: 'test8' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; SSE42-LABEL: 'test8' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX-LABEL: 'test8' +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX-NEXT: Cost Model: Found an estimated cost of 3 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; +; AVX512-LABEL: 'test8' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %mask = icmp eq <2 x i32> %trigger, zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1> %mask, <2 x i32> %dst) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x i32> %res +; + %mask = icmp eq <2 x i32> %trigger, zeroinitializer + %res = call <2 x i32> @llvm.masked.load.v2i32.p0(ptr %addr, i32 4, <2 x i1>%mask, <2 x i32>%dst) + ret <2 x i32> %res +} + +define <2 x double> @test_gather_2f64(<2 x ptr> %ptrs, <2 x i1> %mask, <2 x double> %src0) { +; SSE2-LABEL: 'test_gather_2f64' +; SSE2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SSE42-LABEL: 'test_gather_2f64' +; SSE42-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX1-LABEL: 'test_gather_2f64' +; AVX1-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX2-LABEL: 'test_gather_2f64' +; AVX2-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; SKL-LABEL: 'test_gather_2f64' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; +; AVX512-LABEL: 'test_gather_2f64' +; AVX512-NEXT: Cost Model: Found an estimated cost of 9 for instruction: %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <2 x double> %res +; + %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %mask, <2 x double> %src0) + ret <2 x double> %res +} + +define <4 x i32> @test_gather_4i32(<4 x ptr> %ptrs, <4 x i1> %mask, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> %mask, <4 x i32> %src0) + ret <4 x i32> %res +} + +define <4 x i32> @test_gather_4i32_const_mask(<4 x ptr> %ptrs, <4 x i32> %src0) { +; SSE2-LABEL: 'test_gather_4i32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 15 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SSE42-LABEL: 'test_gather_4i32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX1-LABEL: 'test_gather_4i32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; AVX2-LABEL: 'test_gather_4i32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKL-LABEL: 'test_gather_4i32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; KNL-LABEL: 'test_gather_4i32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 12 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; +; SKX-LABEL: 'test_gather_4i32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x i32> %res +; + %res = call <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr> %ptrs, i32 4, <4 x i1> , <4 x i32> %src0) + ret <4 x i32> %res +} + +define <16 x float> @test_gather_16f32_const_mask(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_var_mask(ptr %base, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, ptr %base, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_ra_var_mask(<16 x ptr> %ptrs, <16 x i32> %ind, <16 x i1>%mask) { +; SSE2-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_ra_var_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 76 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 78 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_ra_var_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_ra_var_mask' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.v = getelementptr float, <16 x ptr> %ptrs, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.v, i32 4, <16 x i1> %mask, <16 x float> undef) + ret <16 x float>%res +} + +define <16 x float> @test_gather_16f32_const_mask2(ptr %base, <16 x i32> %ind) { +; SSE2-LABEL: 'test_gather_16f32_const_mask2' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SSE42-LABEL: 'test_gather_16f32_const_mask2' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 44 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX1-LABEL: 'test_gather_16f32_const_mask2' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX2-LABEL: 'test_gather_16f32_const_mask2' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; SKL-LABEL: 'test_gather_16f32_const_mask2' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; +; AVX512-LABEL: 'test_gather_16f32_const_mask2' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <16 x i32> %ind to <16 x i64> +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <16 x float> %res +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %sext_ind = sext <16 x i32> %ind to <16 x i64> + %gep.random = getelementptr float, <16 x ptr> %broadcast.splat, <16 x i64> %sext_ind + + %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr> %gep.random, i32 4, <16 x i1> , <16 x float> undef) + ret <16 x float>%res +} + +define void @test_scatter_16i32(ptr %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) { +; SSE2-LABEL: 'test_scatter_16i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE2-NEXT: Cost Model: Found an estimated cost of 92 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_16i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SSE42-NEXT: Cost Model: Found an estimated cost of 80 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX1-LABEL: 'test_scatter_16i32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX1-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX1-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX2-LABEL: 'test_scatter_16i32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX2-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKL-LABEL: 'test_scatter_16i32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; SKL-NEXT: Cost Model: Found an estimated cost of 82 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_16i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer +; AVX512-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %imask = bitcast i16 %mask to <16 x i1> +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32> %val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + %broadcast.splatinsert = insertelement <16 x ptr> undef, ptr %base, i32 0 + %broadcast.splat = shufflevector <16 x ptr> %broadcast.splatinsert, <16 x ptr> undef, <16 x i32> zeroinitializer + + %gep.random = getelementptr i32, <16 x ptr> %broadcast.splat, <16 x i32> %ind + %imask = bitcast i16 %mask to <16 x i1> + call void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>%val, <16 x ptr> %gep.random, i32 4, <16 x i1> %imask) + ret void +} + +define void @test_scatter_8i32(<8 x i32>%a1, <8 x ptr> %ptr, <8 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_8i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 46 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_8i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 40 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_8i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 41 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX512-LABEL: 'test_scatter_8i32' +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) +; AVX512-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v8i32.v8p0(<8 x i32> %a1, <8 x ptr> %ptr, i32 4, <8 x i1> %mask) + ret void +} + +define void @test_scatter_4i32(<4 x i32>%a1, <4 x ptr> %ptr, <4 x i1>%mask) { +; SSE2-LABEL: 'test_scatter_4i32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 23 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SSE42-LABEL: 'test_scatter_4i32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; AVX-LABEL: 'test_scatter_4i32' +; AVX-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; AVX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; KNL-LABEL: 'test_scatter_4i32' +; KNL-NEXT: Cost Model: Found an estimated cost of 20 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; +; SKX-LABEL: 'test_scatter_4i32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void +; + call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> %a1, <4 x ptr> %ptr, i32 4, <4 x i1> %mask) + ret void +} + +define <4 x float> @test_gather_4f32(ptr %ptr, <4 x i32> %ind, <4 x i1>%mask) { +; SSE2-LABEL: 'test_gather_4f32' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 19 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> %mask, <4 x float> undef) + ret <4 x float>%res +} + +define <4 x float> @test_gather_4f32_const_mask(ptr %ptr, <4 x i32> %ind) { +; SSE2-LABEL: 'test_gather_4f32_const_mask' +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SSE42-LABEL: 'test_gather_4f32_const_mask' +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SSE42-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SSE42-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SSE42-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX1-LABEL: 'test_gather_4f32_const_mask' +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX1-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX1-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX1-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; AVX2-LABEL: 'test_gather_4f32_const_mask' +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; AVX2-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; AVX2-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; AVX2-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKL-LABEL: 'test_gather_4f32_const_mask' +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; KNL-LABEL: 'test_gather_4f32_const_mask' +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; KNL-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; KNL-NEXT: Cost Model: Found an estimated cost of 11 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; KNL-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; +; SKX-LABEL: 'test_gather_4f32_const_mask' +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %sext_ind = sext <4 x i32> %ind to <4 x i64> +; SKX-NEXT: Cost Model: Found an estimated cost of 0 for instruction: %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) +; SKX-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret <4 x float> %res +; + %sext_ind = sext <4 x i32> %ind to <4 x i64> + %gep.v = getelementptr float, ptr %ptr, <4 x i64> %sext_ind + + %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr> %gep.v, i32 4, <4 x i1> , <4 x float> undef) + ret <4 x float>%res +} + +declare <8 x double> @llvm.masked.load.v8f64.p0(ptr, i32, <8 x i1>, <8 x double>) +declare <7 x double> @llvm.masked.load.v7f64.p0(ptr, i32, <7 x i1>, <7 x double>) +declare <6 x double> @llvm.masked.load.v6f64.p0(ptr, i32, <6 x i1>, <6 x double>) +declare <5 x double> @llvm.masked.load.v5f64.p0(ptr, i32, <5 x i1>, <5 x double>) +declare <4 x double> @llvm.masked.load.v4f64.p0(ptr, i32, <4 x i1>, <4 x double>) +declare <3 x double> @llvm.masked.load.v3f64.p0(ptr, i32, <3 x i1>, <3 x double>) +declare <2 x double> @llvm.masked.load.v2f64.p0(ptr, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.load.v1f64.p0(ptr, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.load.v16f32.p0(ptr, i32, <16 x i1>, <16 x float>) +declare <15 x float> @llvm.masked.load.v15f32.p0(ptr, i32, <15 x i1>, <15 x float>) +declare <14 x float> @llvm.masked.load.v14f32.p0(ptr, i32, <14 x i1>, <14 x float>) +declare <13 x float> @llvm.masked.load.v13f32.p0(ptr, i32, <13 x i1>, <13 x float>) +declare <12 x float> @llvm.masked.load.v12f32.p0(ptr, i32, <12 x i1>, <12 x float>) +declare <11 x float> @llvm.masked.load.v11f32.p0(ptr, i32, <11 x i1>, <11 x float>) +declare <10 x float> @llvm.masked.load.v10f32.p0(ptr, i32, <10 x i1>, <10 x float>) +declare <9 x float> @llvm.masked.load.v9f32.p0(ptr, i32, <9 x i1>, <9 x float>) +declare <8 x float> @llvm.masked.load.v8f32.p0(ptr, i32, <8 x i1>, <8 x float>) +declare <7 x float> @llvm.masked.load.v7f32.p0(ptr, i32, <7 x i1>, <7 x float>) +declare <6 x float> @llvm.masked.load.v6f32.p0(ptr, i32, <6 x i1>, <6 x float>) +declare <5 x float> @llvm.masked.load.v5f32.p0(ptr, i32, <5 x i1>, <5 x float>) +declare <4 x float> @llvm.masked.load.v4f32.p0(ptr, i32, <4 x i1>, <4 x float>) +declare <3 x float> @llvm.masked.load.v3f32.p0(ptr, i32, <3 x i1>, <3 x float>) +declare <2 x float> @llvm.masked.load.v2f32.p0(ptr, i32, <2 x i1>, <2 x float>) +declare <1 x float> @llvm.masked.load.v1f32.p0(ptr, i32, <1 x i1>, <1 x float>) + +declare <8 x i64> @llvm.masked.load.v8i64.p0(ptr, i32, <8 x i1>, <8 x i64>) +declare <7 x i64> @llvm.masked.load.v7i64.p0(ptr, i32, <7 x i1>, <7 x i64>) +declare <6 x i64> @llvm.masked.load.v6i64.p0(ptr, i32, <6 x i1>, <6 x i64>) +declare <5 x i64> @llvm.masked.load.v5i64.p0(ptr, i32, <5 x i1>, <5 x i64>) +declare <4 x i64> @llvm.masked.load.v4i64.p0(ptr, i32, <4 x i1>, <4 x i64>) +declare <3 x i64> @llvm.masked.load.v3i64.p0(ptr, i32, <3 x i1>, <3 x i64>) +declare <2 x i64> @llvm.masked.load.v2i64.p0(ptr, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.load.v1i64.p0(ptr, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.load.v16i32.p0(ptr, i32, <16 x i1>, <16 x i32>) +declare <15 x i32> @llvm.masked.load.v15i32.p0(ptr, i32, <15 x i1>, <15 x i32>) +declare <14 x i32> @llvm.masked.load.v14i32.p0(ptr, i32, <14 x i1>, <14 x i32>) +declare <13 x i32> @llvm.masked.load.v13i32.p0(ptr, i32, <13 x i1>, <13 x i32>) +declare <12 x i32> @llvm.masked.load.v12i32.p0(ptr, i32, <12 x i1>, <12 x i32>) +declare <11 x i32> @llvm.masked.load.v11i32.p0(ptr, i32, <11 x i1>, <11 x i32>) +declare <10 x i32> @llvm.masked.load.v10i32.p0(ptr, i32, <10 x i1>, <10 x i32>) +declare <9 x i32> @llvm.masked.load.v9i32.p0(ptr, i32, <9 x i1>, <9 x i32>) +declare <8 x i32> @llvm.masked.load.v8i32.p0(ptr, i32, <8 x i1>, <8 x i32>) +declare <7 x i32> @llvm.masked.load.v7i32.p0(ptr, i32, <7 x i1>, <7 x i32>) +declare <6 x i32> @llvm.masked.load.v6i32.p0(ptr, i32, <6 x i1>, <6 x i32>) +declare <5 x i32> @llvm.masked.load.v5i32.p0(ptr, i32, <5 x i1>, <5 x i32>) +declare <4 x i32> @llvm.masked.load.v4i32.p0(ptr, i32, <4 x i1>, <4 x i32>) +declare <3 x i32> @llvm.masked.load.v3i32.p0(ptr, i32, <3 x i1>, <3 x i32>) +declare <2 x i32> @llvm.masked.load.v2i32.p0(ptr, i32, <2 x i1>, <2 x i32>) +declare <1 x i32> @llvm.masked.load.v1i32.p0(ptr, i32, <1 x i1>, <1 x i32>) + +declare <32 x i16> @llvm.masked.load.v32i16.p0(ptr, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.load.v16i16.p0(ptr, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.load.v8i16.p0(ptr, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.load.v4i16.p0(ptr, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.load.v64i8.p0(ptr, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.load.v32i8.p0(ptr, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.load.v16i8.p0(ptr, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.load.v8i8.p0(ptr, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.store.v8f64.p0(<8 x double>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f64.p0(<7 x double>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f64.p0(<6 x double>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f64.p0(<5 x double>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f64.p0(<4 x double>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f64.p0(<3 x double>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f64.p0(<2 x double>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f64.p0(<1 x double>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16f32.p0(<16 x float>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15f32.p0(<15 x float>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14f32.p0(<14 x float>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13f32.p0(<13 x float>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12f32.p0(<12 x float>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11f32.p0(<11 x float>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10f32.p0(<10 x float>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9f32.p0(<9 x float>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8f32.p0(<8 x float>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7f32.p0(<7 x float>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6f32.p0(<6 x float>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5f32.p0(<5 x float>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4f32.p0(<4 x float>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3f32.p0(<3 x float>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2f32.p0(<2 x float>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1f32.p0(<1 x float>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v8i64.p0(<8 x i64>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i64.p0(<7 x i64>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i64.p0(<6 x i64>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i64.p0(<5 x i64>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i64.p0(<4 x i64>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i64.p0(<3 x i64>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i64.p0(<2 x i64>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i64.p0(<1 x i64>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v16i32.p0(<16 x i32>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v15i32.p0(<15 x i32>, ptr, i32, <15 x i1>) +declare void @llvm.masked.store.v14i32.p0(<14 x i32>, ptr, i32, <14 x i1>) +declare void @llvm.masked.store.v13i32.p0(<13 x i32>, ptr, i32, <13 x i1>) +declare void @llvm.masked.store.v12i32.p0(<12 x i32>, ptr, i32, <12 x i1>) +declare void @llvm.masked.store.v11i32.p0(<11 x i32>, ptr, i32, <11 x i1>) +declare void @llvm.masked.store.v10i32.p0(<10 x i32>, ptr, i32, <10 x i1>) +declare void @llvm.masked.store.v9i32.p0(<9 x i32>, ptr, i32, <9 x i1>) +declare void @llvm.masked.store.v8i32.p0(<8 x i32>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v7i32.p0(<7 x i32>, ptr, i32, <7 x i1>) +declare void @llvm.masked.store.v6i32.p0(<6 x i32>, ptr, i32, <6 x i1>) +declare void @llvm.masked.store.v5i32.p0(<5 x i32>, ptr, i32, <5 x i1>) +declare void @llvm.masked.store.v4i32.p0(<4 x i32>, ptr, i32, <4 x i1>) +declare void @llvm.masked.store.v3i32.p0(<3 x i32>, ptr, i32, <3 x i1>) +declare void @llvm.masked.store.v2i32.p0(<2 x i32>, ptr, i32, <2 x i1>) +declare void @llvm.masked.store.v1i32.p0(<1 x i32>, ptr, i32, <1 x i1>) + +declare void @llvm.masked.store.v32i16.p0(<32 x i16>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i16.p0(<16 x i16>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i16.p0(<8 x i16>, ptr, i32, <8 x i1>) +declare void @llvm.masked.store.v4i16.p0(<4 x i16>, ptr, i32, <4 x i1>) + +declare void @llvm.masked.store.v64i8.p0(<64 x i8>, ptr, i32, <64 x i1>) +declare void @llvm.masked.store.v32i8.p0(<32 x i8>, ptr, i32, <32 x i1>) +declare void @llvm.masked.store.v16i8.p0(<16 x i8>, ptr, i32, <16 x i1>) +declare void @llvm.masked.store.v8i8.p0(<8 x i8>, ptr, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.gather.v8f64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.gather.v4f64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.gather.v2f64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.gather.v1f64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.gather.v16f32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.gather.v8f32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.gather.v4f32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.gather.v8i64.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.gather.v4i64.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.gather.v2i64.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.gather.v1i64.v1p0(<1 x ptr>, i32, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.gather.v16i32.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.gather.v8i32.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.gather.v4i32.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr>, i32, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.gather.v32i16.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.gather.v16i16.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.gather.v8i16.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.gather.v4i16.v4p0(<4 x ptr>, i32, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.gather.v64i8.v64p0(<64 x ptr>, i32, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.gather.v32i8.v32p0(<32 x ptr>, i32, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.gather.v16i8.v16p0(<16 x ptr>, i32, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.gather.v8i8.v8p0(<8 x ptr>, i32, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.scatter.v8f64.v8p0(<8 x double>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f64.v4p0(<4 x double>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f64.v2p0(<2 x double>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1f64.v1p0(<1 x double>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16f32.v16p0(<16 x float>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8f32.v8p0(<8 x float>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4f32.v4p0(<4 x float>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2f32.v2p0(<2 x float>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v8i64.v8p0(<8 x i64>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i64.v4p0(<4 x i64>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i64.v2p0(<2 x i64>, <2 x ptr>, i32, <2 x i1>) +declare void @llvm.masked.scatter.v1i64.v1p0(<1 x i64>, <1 x ptr>, i32, <1 x i1>) + +declare void @llvm.masked.scatter.v16i32.v16p0(<16 x i32>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i32.v8p0(<8 x i32>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i32.v4p0(<4 x i32>, <4 x ptr>, i32, <4 x i1>) +declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>) + +declare void @llvm.masked.scatter.v32i16.v32p0(<32 x i16>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i16.v16p0(<16 x i16>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i16.v8p0(<8 x i16>, <8 x ptr>, i32, <8 x i1>) +declare void @llvm.masked.scatter.v4i16.v4p0(<4 x i16>, <4 x ptr>, i32, <4 x i1>) + +declare void @llvm.masked.scatter.v64i8.v64p0(<64 x i8>, <64 x ptr>, i32, <64 x i1>) +declare void @llvm.masked.scatter.v32i8.v32p0(<32 x i8>, <32 x ptr>, i32, <32 x i1>) +declare void @llvm.masked.scatter.v16i8.v16p0(<16 x i8>, <16 x ptr>, i32, <16 x i1>) +declare void @llvm.masked.scatter.v8i8.v8p0(<8 x i8>, <8 x ptr>, i32, <8 x i1>) + +declare <8 x double> @llvm.masked.expandload.v8f64(ptr, <8 x i1>, <8 x double>) +declare <4 x double> @llvm.masked.expandload.v4f64(ptr, <4 x i1>, <4 x double>) +declare <2 x double> @llvm.masked.expandload.v2f64(ptr, <2 x i1>, <2 x double>) +declare <1 x double> @llvm.masked.expandload.v1f64(ptr, <1 x i1>, <1 x double>) + +declare <16 x float> @llvm.masked.expandload.v16f32(ptr, <16 x i1>, <16 x float>) +declare <8 x float> @llvm.masked.expandload.v8f32(ptr, <8 x i1>, <8 x float>) +declare <4 x float> @llvm.masked.expandload.v4f32(ptr, <4 x i1>, <4 x float>) +declare <2 x float> @llvm.masked.expandload.v2f32(ptr, <2 x i1>, <2 x float>) + +declare <8 x i64> @llvm.masked.expandload.v8i64(ptr, <8 x i1>, <8 x i64>) +declare <4 x i64> @llvm.masked.expandload.v4i64(ptr, <4 x i1>, <4 x i64>) +declare <2 x i64> @llvm.masked.expandload.v2i64(ptr, <2 x i1>, <2 x i64>) +declare <1 x i64> @llvm.masked.expandload.v1i64(ptr, <1 x i1>, <1 x i64>) + +declare <16 x i32> @llvm.masked.expandload.v16i32(ptr, <16 x i1>, <16 x i32>) +declare <8 x i32> @llvm.masked.expandload.v8i32(ptr, <8 x i1>, <8 x i32>) +declare <4 x i32> @llvm.masked.expandload.v4i32(ptr, <4 x i1>, <4 x i32>) +declare <2 x i32> @llvm.masked.expandload.v2i32(ptr, <2 x i1>, <2 x i32>) + +declare <32 x i16> @llvm.masked.expandload.v32i16(ptr, <32 x i1>, <32 x i16>) +declare <16 x i16> @llvm.masked.expandload.v16i16(ptr, <16 x i1>, <16 x i16>) +declare <8 x i16> @llvm.masked.expandload.v8i16(ptr, <8 x i1>, <8 x i16>) +declare <4 x i16> @llvm.masked.expandload.v4i16(ptr, <4 x i1>, <4 x i16>) + +declare <64 x i8> @llvm.masked.expandload.v64i8(ptr, <64 x i1>, <64 x i8>) +declare <32 x i8> @llvm.masked.expandload.v32i8(ptr, <32 x i1>, <32 x i8>) +declare <16 x i8> @llvm.masked.expandload.v16i8(ptr, <16 x i1>, <16 x i8>) +declare <8 x i8> @llvm.masked.expandload.v8i8(ptr, <8 x i1>, <8 x i8>) + +declare void @llvm.masked.compressstore.v8f64(<8 x double>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f64(<4 x double>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f64(<2 x double>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1f64(<1 x double>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16f32(<16 x float>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8f32(<8 x float>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4f32(<4 x float>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2f32(<2 x float>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v8i64(<8 x i64>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i64(<4 x i64>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i64(<2 x i64>, ptr, <2 x i1>) +declare void @llvm.masked.compressstore.v1i64(<1 x i64>, ptr, <1 x i1>) + +declare void @llvm.masked.compressstore.v16i32(<16 x i32>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i32(<8 x i32>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i32(<4 x i32>, ptr, <4 x i1>) +declare void @llvm.masked.compressstore.v2i32(<2 x i32>, ptr, <2 x i1>) + +declare void @llvm.masked.compressstore.v32i16(<32 x i16>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i16(<16 x i16>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i16(<8 x i16>, ptr, <8 x i1>) +declare void @llvm.masked.compressstore.v4i16(<4 x i16>, ptr, <4 x i1>) + +declare void @llvm.masked.compressstore.v64i8(<64 x i8>, ptr, <64 x i1>) +declare void @llvm.masked.compressstore.v32i8(<32 x i8>, ptr, <32 x i1>) +declare void @llvm.masked.compressstore.v16i8(<16 x i8>, ptr, <16 x i1>) +declare void @llvm.masked.compressstore.v8i8(<8 x i8>, ptr, <8 x i1>) -- cgit v1.1 From c83f23d6abb6f8d693c643bc1b43f9b9e06bc537 Mon Sep 17 00:00:00 2001 From: Eli Friedman Date: Thu, 4 Apr 2024 11:25:44 -0700 Subject: [AArch64] Fix heuristics for folding "lsl" into load/store ops. (#86894) The existing heuristics were assuming that every core behaves like an Apple A7, where any extend/shift costs an extra micro-op... but in reality, nothing else behaves like that. On some older Cortex designs, shifts by 1 or 4 cost extra, but all other shifts/extensions are free. On all other cores, as far as I can tell, all shifts/extensions for integer loads are free (i.e. the same cost as an unshifted load). To reflect this, this patch: - Enables aggressive folding of shifts into loads by default. - Removes the old AddrLSLFast feature, since it applies to everything except A7 (and even if you are explicitly targeting A7, we want to assume extensions are free because the code will almost always run on a newer core). - Adds a new feature AddrLSLSlow14 that applies specifically to the Cortex cores where shifts by 1 or 4 cost extra. I didn't add support for AddrLSLSlow14 on the GlobalISel side because it would require a bunch of refactoring to work correctly. Someone can pick this up as a followup. --- llvm/lib/Target/AArch64/AArch64.td | 53 +++++----- llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 29 +++--- llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 2 +- .../AArch64/GISel/AArch64InstructionSelector.cpp | 6 +- .../AArch64/GlobalISel/load-addressing-modes.mir | 12 +-- llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll | 112 +++++++-------------- .../AArch64/aarch64-split-and-bitmask-immediate.ll | 5 +- .../CodeGen/AArch64/arm64-addr-mode-folding.ll | 2 +- llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll | 20 ++-- .../CodeGen/AArch64/avoid-free-ext-promotion.ll | 11 +- llvm/test/CodeGen/AArch64/cheap-as-a-move.ll | 30 +++--- llvm/test/CodeGen/AArch64/extract-bits.ll | 5 +- .../CodeGen/AArch64/machine-licm-hoist-load.ll | 5 +- llvm/test/CodeGen/AArch64/sink-and-fold.ll | 4 +- 14 files changed, 119 insertions(+), 177 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 6425aa9..3af427d 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -391,9 +391,18 @@ def FeatureNoNegativeImmediates : SubtargetFeature<"no-neg-immediates", "equivalent when the immediate does " "not fit in the encoding.">; -def FeatureAddrLSLFast : SubtargetFeature< - "addr-lsl-fast", "HasAddrLSLFast", "true", - "Address operands with logical shift of up to 3 places are cheap">; +// Address operands with shift amount 2 or 3 are fast on all Arm chips except +// some old Apple cores (A7-A10?) which handle all shifts slowly. Cortex-A57 +// and derived designs through Cortex-X1 take an extra micro-op for shifts +// of 1 or 4. Other Arm chips handle all shifted operands at the same speed +// as unshifted operands. +// +// We don't try to model the behavior of the old Apple cores because new code +// targeting A7 is very unlikely to actually run on an A7. The Cortex cores +// are modeled by FeatureAddrLSLSlow14. +def FeatureAddrLSLSlow14 : SubtargetFeature< + "addr-lsl-slow-14", "HasAddrLSLSlow14", "true", + "Address operands with shift amount of 1 or 4 are slow">; def FeatureALULSLFast : SubtargetFeature< "alu-lsl-fast", "HasALULSLFast", "true", @@ -885,6 +894,7 @@ def TuneA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", FeatureBalanceFPOps, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeaturePostRAScheduler, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -903,6 +913,7 @@ def TuneA72 : SubtargetFeature<"a72", "ARMProcFamily", "CortexA72", FeatureFuseAES, FeatureFuseAdrpAdd, FeatureFuseLiterals, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -910,6 +921,7 @@ def TuneA73 : SubtargetFeature<"a73", "ARMProcFamily", "CortexA73", "Cortex-A73 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -917,6 +929,7 @@ def TuneA75 : SubtargetFeature<"a75", "ARMProcFamily", "CortexA75", "Cortex-A75 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, + FeatureAddrLSLSlow14, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -924,7 +937,7 @@ def TuneA76 : SubtargetFeature<"a76", "ARMProcFamily", "CortexA76", "Cortex-A76 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -934,7 +947,7 @@ def TuneA77 : SubtargetFeature<"a77", "ARMProcFamily", "CortexA77", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeatureEnableSelectOptimize, FeaturePredictableSelectIsExpensive]>; @@ -944,7 +957,7 @@ def TuneA78 : SubtargetFeature<"a78", "ARMProcFamily", "CortexA78", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -956,7 +969,7 @@ def TuneA78AE : SubtargetFeature<"a78ae", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -968,7 +981,7 @@ def TuneA78C : SubtargetFeature<"a78c", "ARMProcFamily", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -979,7 +992,6 @@ def TuneA710 : SubtargetFeature<"a710", "ARMProcFamily", "CortexA710", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -990,7 +1002,6 @@ def TuneA715 : SubtargetFeature<"a715", "ARMProcFamily", "CortexA715", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1001,7 +1012,6 @@ def TuneA720 : SubtargetFeature<"a720", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1012,7 +1022,6 @@ def TuneA720AE : SubtargetFeature<"a720ae", "ARMProcFamily", "CortexA720", FeatureFuseAES, FeaturePostRAScheduler, FeatureCmpBccFusion, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureEnableSelectOptimize, @@ -1028,7 +1037,7 @@ def TuneX1 : SubtargetFeature<"cortex-x1", "ARMProcFamily", "CortexX1", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1039,7 +1048,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", FeatureCmpBccFusion, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1047,7 +1055,6 @@ def TuneX2 : SubtargetFeature<"cortex-x2", "ARMProcFamily", "CortexX2", def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", "Cortex-X3 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1057,7 +1064,6 @@ def TuneX3 : SubtargetFeature<"cortex-x3", "ARMProcFamily", "CortexX3", def TuneX4 : SubtargetFeature<"cortex-x4", "ARMProcFamily", "CortexX4", "Cortex-X4 ARM processors", [ - FeatureAddrLSLFast, FeatureALULSLFast, FeatureFuseAdrpAdd, FeatureFuseAES, @@ -1215,7 +1221,6 @@ def TuneExynosM3 : SubtargetFeature<"exynosm3", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive]>; @@ -1234,7 +1239,6 @@ def TuneExynosM4 : SubtargetFeature<"exynosm4", "ARMProcFamily", "ExynosM3", FeatureFuseAdrpAdd, FeatureFuseLiterals, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureZCZeroing]>; @@ -1244,7 +1248,6 @@ def TuneKryo : SubtargetFeature<"kryo", "ARMProcFamily", "Kryo", FeaturePostRAScheduler, FeaturePredictableSelectIsExpensive, FeatureZCZeroing, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureStorePairSuppress]>; @@ -1254,7 +1257,6 @@ def TuneFalkor : SubtargetFeature<"falkor", "ARMProcFamily", "Falkor", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureSlowSTRQro]>; @@ -1268,7 +1270,7 @@ def TuneNeoverseN1 : SubtargetFeature<"neoversen1", "ARMProcFamily", "NeoverseN1 "Neoverse N1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1278,7 +1280,6 @@ def TuneNeoverseN2 : SubtargetFeature<"neoversen2", "ARMProcFamily", "NeoverseN2 "Neoverse N2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1288,7 +1289,6 @@ def TuneNeoverse512TVB : SubtargetFeature<"neoverse512tvb", "ARMProcFamily", "Ne "Neoverse 512-TVB ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1298,7 +1298,7 @@ def TuneNeoverseV1 : SubtargetFeature<"neoversev1", "ARMProcFamily", "NeoverseV1 "Neoverse V1 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, + FeatureAddrLSLSlow14, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1309,7 +1309,6 @@ def TuneNeoverseV2 : SubtargetFeature<"neoversev2", "ARMProcFamily", "NeoverseV2 "Neoverse V2 ARM processors", [ FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeaturePostRAScheduler, FeatureEnableSelectOptimize, @@ -1321,7 +1320,6 @@ def TuneSaphira : SubtargetFeature<"saphira", "ARMProcFamily", "Saphira", FeaturePredictableSelectIsExpensive, FeatureZCZeroing, FeatureStorePairSuppress, - FeatureAddrLSLFast, FeatureALULSLFast]>; def TuneThunderX2T99 : SubtargetFeature<"thunderx2t99", "ARMProcFamily", "ThunderX2T99", @@ -1381,7 +1379,6 @@ def TuneAmpere1 : SubtargetFeature<"ampere1", "ARMProcFamily", "Ampere1", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1397,7 +1394,6 @@ def TuneAmpere1A : SubtargetFeature<"ampere1a", "ARMProcFamily", "Ampere1A", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, @@ -1414,7 +1410,6 @@ def TuneAmpere1B : SubtargetFeature<"ampere1b", "ARMProcFamily", "Ampere1B", FeaturePostRAScheduler, FeatureFuseAES, FeatureFuseAdrpAdd, - FeatureAddrLSLFast, FeatureALULSLFast, FeatureAggressiveFMA, FeatureArithmeticBccFusion, diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 163ed52..51bec36 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -462,7 +462,7 @@ private: SDValue &Offset, SDValue &SignExtend, SDValue &DoShift); bool isWorthFoldingALU(SDValue V, bool LSL = false) const; - bool isWorthFoldingAddr(SDValue V) const; + bool isWorthFoldingAddr(SDValue V, unsigned Size) const; bool SelectExtendedSHL(SDValue N, unsigned Size, bool WantExtend, SDValue &Offset, SDValue &SignExtend); @@ -674,17 +674,22 @@ static bool isWorthFoldingSHL(SDValue V) { /// Determine whether it is worth to fold V into an extended register addressing /// mode. -bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V) const { +bool AArch64DAGToDAGISel::isWorthFoldingAddr(SDValue V, unsigned Size) const { // Trivial if we are optimizing for code size or if there is only // one use of the value. if (CurDAG->shouldOptForSize() || V.hasOneUse()) return true; - // If a subtarget has a fastpath LSL we can fold a logical shift into - // the addressing mode and save a cycle. - if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::SHL && - isWorthFoldingSHL(V)) + + // If a subtarget has a slow shift, folding a shift into multiple loads + // costs additional micro-ops. + if (Subtarget->hasAddrLSLSlow14() && (Size == 2 || Size == 16)) + return false; + + // Check whether we're going to emit the address arithmetic anyway because + // it's used by a non-address operation. + if (V.getOpcode() == ISD::SHL && isWorthFoldingSHL(V)) return true; - if (Subtarget->hasAddrLSLFast() && V.getOpcode() == ISD::ADD) { + if (V.getOpcode() == ISD::ADD) { const SDValue LHS = V.getOperand(0); const SDValue RHS = V.getOperand(1); if (LHS.getOpcode() == ISD::SHL && isWorthFoldingSHL(LHS)) @@ -1203,7 +1208,7 @@ bool AArch64DAGToDAGISel::SelectExtendedSHL(SDValue N, unsigned Size, if (ShiftVal != 0 && ShiftVal != LegalShiftVal) return false; - return isWorthFoldingAddr(N); + return isWorthFoldingAddr(N, Size); } bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, @@ -1231,7 +1236,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); + bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && @@ -1261,7 +1266,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, Offset = narrowIfNeeded(CurDAG, LHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); - if (isWorthFoldingAddr(LHS)) + if (isWorthFoldingAddr(LHS, Size)) return true; } @@ -1273,7 +1278,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeWRO(SDValue N, unsigned Size, Offset = narrowIfNeeded(CurDAG, RHS.getOperand(0)); SignExtend = CurDAG->getTargetConstant(Ext == AArch64_AM::SXTW, dl, MVT::i32); - if (isWorthFoldingAddr(RHS)) + if (isWorthFoldingAddr(RHS, Size)) return true; } @@ -1343,7 +1348,7 @@ bool AArch64DAGToDAGISel::SelectAddrModeXRO(SDValue N, unsigned Size, } // Remember if it is worth folding N when it produces extended register. - bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N); + bool IsExtendedRegisterWorthFolding = isWorthFoldingAddr(N, Size); // Try to match a shifted extend on the RHS. if (IsExtendedRegisterWorthFolding && RHS.getOpcode() == ISD::SHL && diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index d0c5e6b..22687b0 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -2993,7 +2993,7 @@ bool AArch64InstrInfo::canFoldIntoAddrMode(const MachineInstr &MemI, return false; Shift = AArch64_AM::getShiftValue(Shift); if (!OptSize) { - if ((Shift != 2 && Shift != 3) || !Subtarget.hasAddrLSLFast()) + if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14()) return false; if (avoidSlowSTRQ(MemI)) return false; diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp index a8f2c45..d4daf17 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp @@ -6907,10 +6907,8 @@ bool AArch64InstructionSelector::isWorthFoldingIntoExtendedReg( MI.getParent()->getParent()->getFunction().hasOptSize()) return true; - // It's better to avoid folding and recomputing shifts when we don't have a - // fastpath. - if (!STI.hasAddrLSLFast()) - return false; + // FIXME: Consider checking HasAddrLSLSlow14 and HasALULSLFast as + // appropriate. // We have a fastpath, so folding a shift in and potentially computing it // many times may be beneficial. Check if this is only used in memory ops. diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir index 499c08f..7921de6 100644 --- a/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir +++ b/llvm/test/CodeGen/AArch64/GlobalISel/load-addressing-modes.mir @@ -15,7 +15,7 @@ define void @mul_wrong_pow_2(ptr %addr) { ret void } define void @more_than_one_use_shl_1(ptr %addr) { ret void } define void @more_than_one_use_shl_2(ptr %addr) { ret void } - define void @more_than_one_use_shl_lsl_fast(ptr %addr) #1 { ret void } + define void @more_than_one_use_shl_lsl_fast(ptr %addr) { ret void } define void @more_than_one_use_shl_lsl_slow(ptr %addr) { ret void } define void @more_than_one_use_shl_minsize(ptr %addr) #0 { ret void } define void @ldrwrox(ptr %addr) { ret void } @@ -24,7 +24,6 @@ define void @ldbbrox(ptr %addr) { ret void } define void @ldrqrox(ptr %addr) { ret void } attributes #0 = { optsize } - attributes #1 = { "target-features"="+addr-lsl-fast" } ... --- @@ -478,11 +477,10 @@ body: | ; CHECK: liveins: $x0, $x1, $x2 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr64 = COPY $x0 - ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64 = COPY $x1 - ; CHECK-NEXT: [[ADDXrs:%[0-9]+]]:gpr64common = ADDXrs [[COPY1]], [[COPY]], 3 - ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64 = LDRXui [[ADDXrs]], 0 :: (load (s64) from %ir.addr) - ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXui]], [[LDRXui1]] + ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64sp = COPY $x1 + ; CHECK-NEXT: [[LDRXroX:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[LDRXroX1:%[0-9]+]]:gpr64 = LDRXroX [[COPY1]], [[COPY]], 0, 1 :: (load (s64) from %ir.addr) + ; CHECK-NEXT: [[ADDXrr:%[0-9]+]]:gpr64 = ADDXrr [[LDRXroX]], [[LDRXroX1]] ; CHECK-NEXT: $x2 = COPY [[ADDXrr]] ; CHECK-NEXT: RET_ReallyLR implicit $x2 %0:gpr(s64) = COPY $x0 diff --git a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll index 59cd87f..022aaea 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-fold-lslfast.ll @@ -1,6 +1,6 @@ ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py -; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK0 -; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-fast | FileCheck %s --check-prefixes=CHECK,CHECK3 +; RUN: llc < %s -mtriple=aarch64-linux-gnu -mattr=+addr-lsl-slow-14 | FileCheck %s --check-prefixes=CHECK,CHECK0 +; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK3 %struct.a = type [256 x i16] %struct.b = type [256 x i32] @@ -49,36 +49,20 @@ define i16 @halfword(ptr %ctx, i32 %xor72) nounwind { } define i32 @word(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: word: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #2 -; CHECK0-NEXT: ldr w20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov w0, w20 -; CHECK0-NEXT: str w20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: word: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr w20, [x0, x21, lsl #2] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov w0, w20 -; CHECK3-NEXT: str w20, [x19, x21, lsl #2] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: word: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr w20, [x0, x21, lsl #2] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov w0, w20 +; CHECK-NEXT: str w20, [x19, x21, lsl #2] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -90,36 +74,20 @@ define i32 @word(ptr %ctx, i32 %xor72) nounwind { } define i64 @doubleword(ptr %ctx, i32 %xor72) nounwind { -; CHECK0-LABEL: doubleword: -; CHECK0: // %bb.0: -; CHECK0-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK0-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK0-NEXT: ubfx x8, x1, #9, #8 -; CHECK0-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK0-NEXT: mov x19, x0 -; CHECK0-NEXT: lsl x21, x8, #3 -; CHECK0-NEXT: ldr x20, [x0, x21] -; CHECK0-NEXT: bl foo -; CHECK0-NEXT: mov x0, x20 -; CHECK0-NEXT: str x20, [x19, x21] -; CHECK0-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK0-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: doubleword: -; CHECK3: // %bb.0: -; CHECK3-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill -; CHECK3-NEXT: // kill: def $w1 killed $w1 def $x1 -; CHECK3-NEXT: ubfx x21, x1, #9, #8 -; CHECK3-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill -; CHECK3-NEXT: mov x19, x0 -; CHECK3-NEXT: ldr x20, [x0, x21, lsl #3] -; CHECK3-NEXT: bl foo -; CHECK3-NEXT: mov x0, x20 -; CHECK3-NEXT: str x20, [x19, x21, lsl #3] -; CHECK3-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload -; CHECK3-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload -; CHECK3-NEXT: ret +; CHECK-LABEL: doubleword: +; CHECK: // %bb.0: +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: // kill: def $w1 killed $w1 def $x1 +; CHECK-NEXT: ubfx x21, x1, #9, #8 +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: mov x19, x0 +; CHECK-NEXT: ldr x20, [x0, x21, lsl #3] +; CHECK-NEXT: bl foo +; CHECK-NEXT: mov x0, x20 +; CHECK-NEXT: str x20, [x19, x21, lsl #3] +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload +; CHECK-NEXT: ret %shr81 = lshr i32 %xor72, 9 %conv82 = zext i32 %shr81 to i64 %idxprom83 = and i64 %conv82, 255 @@ -163,20 +131,12 @@ endbb: } define i64 @gep3(ptr %p, i64 %b) { -; CHECK0-LABEL: gep3: -; CHECK0: // %bb.0: -; CHECK0-NEXT: lsl x9, x1, #3 -; CHECK0-NEXT: mov x8, x0 -; CHECK0-NEXT: ldr x0, [x0, x9] -; CHECK0-NEXT: str x1, [x8, x9] -; CHECK0-NEXT: ret -; -; CHECK3-LABEL: gep3: -; CHECK3: // %bb.0: -; CHECK3-NEXT: mov x8, x0 -; CHECK3-NEXT: ldr x0, [x0, x1, lsl #3] -; CHECK3-NEXT: str x1, [x8, x1, lsl #3] -; CHECK3-NEXT: ret +; CHECK-LABEL: gep3: +; CHECK: // %bb.0: +; CHECK-NEXT: mov x8, x0 +; CHECK-NEXT: ldr x0, [x0, x1, lsl #3] +; CHECK-NEXT: str x1, [x8, x1, lsl #3] +; CHECK-NEXT: ret %g = getelementptr inbounds i64, ptr %p, i64 %b %l = load i64, ptr %g store i64 %b, ptr %g diff --git a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll index 573f921..e31c9a0 100644 --- a/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll +++ b/llvm/test/CodeGen/AArch64/aarch64-split-and-bitmask-immediate.ll @@ -134,9 +134,8 @@ define void @test8(i64 %a, ptr noalias %src, ptr noalias %dst, i64 %n) { ; CHECK-NEXT: b.hs .LBB7_1 ; CHECK-NEXT: // %bb.3: // %if.then ; CHECK-NEXT: // in Loop: Header=BB7_2 Depth=1 -; CHECK-NEXT: lsl x10, x8, #3 -; CHECK-NEXT: ldr x11, [x1, x10] -; CHECK-NEXT: str x11, [x2, x10] +; CHECK-NEXT: ldr x10, [x1, x8, lsl #3] +; CHECK-NEXT: str x10, [x2, x8, lsl #3] ; CHECK-NEXT: b .LBB7_1 ; CHECK-NEXT: .LBB7_4: // %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll index d593272..6bcd2f0 100644 --- a/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll +++ b/llvm/test/CodeGen/AArch64/arm64-addr-mode-folding.ll @@ -125,7 +125,7 @@ return: ; preds = %if.end23, %if.then3 } ; CHECK: @test -; CHECK-NOT: , uxtw #2] +; CHECK: , uxtw #2] define i32 @test(ptr %array, i8 zeroext %c, i32 %arg) { entry: %conv = zext i8 %c to i32 diff --git a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll index 3542b26..5b055a4 100644 --- a/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll +++ b/llvm/test/CodeGen/AArch64/arm64-vector-ldst.ll @@ -201,11 +201,10 @@ define void @fct1_64x1(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_64x1: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray64x1 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray64x1] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <1 x i64>, ptr %array, i64 %offset @@ -238,11 +237,10 @@ define void @fct1_32x2(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_32x2: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray32x2 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray32x2] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <2 x i32>, ptr %array, i64 %offset @@ -275,11 +273,10 @@ define void @fct1_16x4(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_16x4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray16x4 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray16x4] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <4 x i16>, ptr %array, i64 %offset @@ -312,11 +309,10 @@ define void @fct1_8x8(ptr nocapture %array, i64 %offset) nounwind ssp { ; CHECK-LABEL: fct1_8x8: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: adrp x8, :got:globalArray8x8 -; CHECK-NEXT: lsl x9, x1, #3 ; CHECK-NEXT: ldr x8, [x8, :got_lo12:globalArray8x8] -; CHECK-NEXT: ldr d0, [x0, x9] +; CHECK-NEXT: ldr d0, [x0, x1, lsl #3] ; CHECK-NEXT: ldr x8, [x8] -; CHECK-NEXT: str d0, [x8, x9] +; CHECK-NEXT: str d0, [x8, x1, lsl #3] ; CHECK-NEXT: ret entry: %arrayidx = getelementptr inbounds <8 x i8>, ptr %array, i64 %offset diff --git a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll index 8f19553..634d1b9 100644 --- a/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll +++ b/llvm/test/CodeGen/AArch64/avoid-free-ext-promotion.ll @@ -82,13 +82,12 @@ define void @avoid_promotion_2_and(ptr nocapture noundef %arg) { ; CHECK-NEXT: eor w10, w10, w11 ; CHECK-NEXT: ldur w11, [x8, #-24] ; CHECK-NEXT: and w10, w10, w14 -; CHECK-NEXT: ldp x15, x14, [x8, #-16] -; CHECK-NEXT: ubfiz x13, x10, #1, #32 +; CHECK-NEXT: ldp x14, x13, [x8, #-16] ; CHECK-NEXT: str w10, [x8] -; CHECK-NEXT: and w10, w11, w12 -; CHECK-NEXT: ldrh w11, [x14, x13] -; CHECK-NEXT: strh w11, [x15, w10, uxtw #1] -; CHECK-NEXT: strh w12, [x14, x13] +; CHECK-NEXT: and w11, w11, w12 +; CHECK-NEXT: ldrh w15, [x13, w10, uxtw #1] +; CHECK-NEXT: strh w15, [x14, w11, uxtw #1] +; CHECK-NEXT: strh w12, [x13, w10, uxtw #1] ; CHECK-NEXT: b LBB1_1 ; CHECK-NEXT: LBB1_4: ; %exit ; CHECK-NEXT: ret diff --git a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll index b5c2104..50c70c5 100644 --- a/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll +++ b/llvm/test/CodeGen/AArch64/cheap-as-a-move.ll @@ -7,7 +7,7 @@ target triple = "aarch64-unknown-linux" define void @f0(ptr %a, i64 %n) { ; CHECK-LABEL: f0: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: stp x30, x23, [sp, #-48]! // 16-byte Folded Spill +; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill ; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill ; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill ; CHECK-NEXT: .cfi_def_cfa_offset 48 @@ -15,7 +15,6 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 ; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w23, -40 ; CHECK-NEXT: .cfi_offset w30, -48 ; CHECK-NEXT: mov x21, #1 // =0x1 ; CHECK-NEXT: mov x19, x1 @@ -27,18 +26,17 @@ define void @f0(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB0_2 ; CHECK-NEXT: .LBB0_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x23, x22, #2 +; CHECK-NEXT: ldr w0, [x20, x22, lsl #2] ; CHECK-NEXT: mov x1, x21 -; CHECK-NEXT: ldr w0, [x20, x23] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x23] +; CHECK-NEXT: str w0, [x20, x22, lsl #2] ; CHECK-NEXT: add x22, x22, #1 ; CHECK-NEXT: cmp x22, x19 ; CHECK-NEXT: b.lt .LBB0_1 ; CHECK-NEXT: .LBB0_2: // %exit ; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload ; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldp x30, x23, [sp], #48 // 16-byte Folded Reload +; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop @@ -64,15 +62,13 @@ exit: define void @f1(ptr %a, i64 %n) { ; CHECK-LABEL: f1: ; CHECK: // %bb.0: // %entry -; CHECK-NEXT: str x30, [sp, #-48]! // 8-byte Folded Spill -; CHECK-NEXT: stp x22, x21, [sp, #16] // 16-byte Folded Spill -; CHECK-NEXT: stp x20, x19, [sp, #32] // 16-byte Folded Spill -; CHECK-NEXT: .cfi_def_cfa_offset 48 +; CHECK-NEXT: stp x30, x21, [sp, #-32]! // 16-byte Folded Spill +; CHECK-NEXT: stp x20, x19, [sp, #16] // 16-byte Folded Spill +; CHECK-NEXT: .cfi_def_cfa_offset 32 ; CHECK-NEXT: .cfi_offset w19, -8 ; CHECK-NEXT: .cfi_offset w20, -16 ; CHECK-NEXT: .cfi_offset w21, -24 -; CHECK-NEXT: .cfi_offset w22, -32 -; CHECK-NEXT: .cfi_offset w30, -48 +; CHECK-NEXT: .cfi_offset w30, -32 ; CHECK-NEXT: mov x19, x1 ; CHECK-NEXT: mov x20, x0 ; CHECK-NEXT: mov x21, xzr @@ -80,19 +76,17 @@ define void @f1(ptr %a, i64 %n) { ; CHECK-NEXT: b.ge .LBB1_2 ; CHECK-NEXT: .LBB1_1: // %loop.body ; CHECK-NEXT: // =>This Inner Loop Header: Depth=1 -; CHECK-NEXT: lsl x22, x21, #2 +; CHECK-NEXT: ldr w0, [x20, x21, lsl #2] ; CHECK-NEXT: mov x1, #1450704896 // =0x56780000 ; CHECK-NEXT: movk x1, #4660, lsl #48 -; CHECK-NEXT: ldr w0, [x20, x22] ; CHECK-NEXT: bl g -; CHECK-NEXT: str w0, [x20, x22] +; CHECK-NEXT: str w0, [x20, x21, lsl #2] ; CHECK-NEXT: add x21, x21, #1 ; CHECK-NEXT: cmp x21, x19 ; CHECK-NEXT: b.lt .LBB1_1 ; CHECK-NEXT: .LBB1_2: // %exit -; CHECK-NEXT: ldp x20, x19, [sp, #32] // 16-byte Folded Reload -; CHECK-NEXT: ldp x22, x21, [sp, #16] // 16-byte Folded Reload -; CHECK-NEXT: ldr x30, [sp], #48 // 8-byte Folded Reload +; CHECK-NEXT: ldp x20, x19, [sp, #16] // 16-byte Folded Reload +; CHECK-NEXT: ldp x30, x21, [sp], #32 // 16-byte Folded Reload ; CHECK-NEXT: ret entry: br label %loop diff --git a/llvm/test/CodeGen/AArch64/extract-bits.ll b/llvm/test/CodeGen/AArch64/extract-bits.ll index d4ea143..b87157a 100644 --- a/llvm/test/CodeGen/AArch64/extract-bits.ll +++ b/llvm/test/CodeGen/AArch64/extract-bits.ll @@ -972,10 +972,9 @@ define void @pr38938(ptr %a0, ptr %a1) nounwind { ; CHECK: // %bb.0: ; CHECK-NEXT: ldr x8, [x1] ; CHECK-NEXT: ubfx x8, x8, #21, #10 -; CHECK-NEXT: lsl x8, x8, #2 -; CHECK-NEXT: ldr w9, [x0, x8] +; CHECK-NEXT: ldr w9, [x0, x8, lsl #2] ; CHECK-NEXT: add w9, w9, #1 -; CHECK-NEXT: str w9, [x0, x8] +; CHECK-NEXT: str w9, [x0, x8, lsl #2] ; CHECK-NEXT: ret %tmp = load i64, ptr %a1, align 8 %tmp1 = lshr i64 %tmp, 21 diff --git a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll index 30123a3..e8dafd5 100644 --- a/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll +++ b/llvm/test/CodeGen/AArch64/machine-licm-hoist-load.ll @@ -223,10 +223,9 @@ define i64 @three_dimensional_middle(ptr %a, ptr %b, i64 %N, i64 %M, i64 %K) { ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 ; CHECK-NEXT: // => This Loop Header: Depth=2 ; CHECK-NEXT: // Child Loop BB3_3 Depth 3 -; CHECK-NEXT: lsl x12, x11, #3 +; CHECK-NEXT: ldr x13, [x1, x11, lsl #3] +; CHECK-NEXT: ldr x12, [x10, x11, lsl #3] ; CHECK-NEXT: mov x14, x4 -; CHECK-NEXT: ldr x13, [x1, x12] -; CHECK-NEXT: ldr x12, [x10, x12] ; CHECK-NEXT: ldr w13, [x13] ; CHECK-NEXT: .LBB3_3: // %for.body8 ; CHECK-NEXT: // Parent Loop BB3_1 Depth=1 diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll index 5200722..f65a08a 100644 --- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll +++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll @@ -100,7 +100,7 @@ exit: } ; Address calculation cheap enough on some cores. -define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define i32 @f3(i1 %c1, ptr %p, i64 %i) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f3: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: tbz w0, #0, .LBB3_2 @@ -130,7 +130,7 @@ exit: ret i32 %v } -define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast,+addr-lsl-fast" { +define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" { ; CHECK-LABEL: f4: ; CHECK: // %bb.0: // %entry ; CHECK-NEXT: cmp x1, #1 -- cgit v1.1 From 5ae143da459a73d0f6fc796d42c3fabf3ab5f9e8 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 4 Apr 2024 11:02:45 -0700 Subject: [SLP]Add a test with the incorrect casting for external user, NFC. --- .../X86/external-user-instruction-minbitwidth.ll | 64 ++++++++++++++++++++++ 1 file changed, 64 insertions(+) create mode 100644 llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll new file mode 100644 index 0000000..9d3d602 --- /dev/null +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll @@ -0,0 +1,64 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 +; RUN: opt -S --passes=slp-vectorizer -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s + +@e = global i8 0 +@c = global i16 0 +@d = global i32 0 + +define i8 @test() { +; CHECK-LABEL: define i8 @test() { +; CHECK-NEXT: entry: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, ptr @e, align 1 +; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[TMP1:%.*]] = load i16, ptr @c, align 2 +; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32 +; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 +; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i32> [[TMP3]] to <8 x i16> +; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0 +; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer +; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> +; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP5]], [[TMP8]] +; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP9]]) +; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8 +; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP5]], i32 7 +; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[XOR_31:%.*]] = and i32 [[TMP13]], -2 +; CHECK-NEXT: store i32 [[XOR_31]], ptr @d, align 4 +; CHECK-NEXT: ret i8 [[CONV4_30]] +; +entry: + %0 = load i8, ptr @e, align 1 + %conv = sext i8 %0 to i32 + %1 = load i16, ptr @c, align 2 + %conv1 = zext i16 %1 to i32 + %or.16 = or i32 %conv, 1 + %add.16 = add nsw i32 %or.16, %conv1 + %or.18 = or i32 %conv, 1 + %add.18 = add nsw i32 %or.18, %conv1 + %conv4.181 = or i32 %add.16, %add.18 + %or.20 = or i32 %conv, 1 + %add.20 = add nsw i32 %or.20, %conv1 + %conv4.202 = or i32 %conv4.181, %add.20 + %or.22 = or i32 %conv, 1 + %add.22 = add nsw i32 %or.22, %conv1 + %conv4.223 = or i32 %conv4.202, %add.22 + %or.24 = or i32 %conv, 1 + %add.24 = add nsw i32 %or.24, %conv1 + %conv4.244 = or i32 %conv4.223, %add.24 + %or.26 = or i32 %conv, 1 + %add.26 = add nsw i32 %or.26, %conv1 + %conv4.265 = or i32 %conv4.244, %add.26 + %or.28 = or i32 %conv, 1 + %add.28 = add nsw i32 %or.28, %conv1 + %conv4.286 = or i32 %conv4.265, %add.28 + %or.30 = or i32 %conv, 32769 + %add.30 = add nsw i32 %or.30, %conv1 + %conv4.307 = or i32 %conv4.286, %add.30 + %conv4.30 = trunc i32 %conv4.307 to i8 + %xor.31 = and i32 %or.30, -2 + store i32 %xor.31, ptr @d, align 4 + ret i8 %conv4.30 +} -- cgit v1.1 From 8004ce236795d48063c636d1ce4818ebcb526b21 Mon Sep 17 00:00:00 2001 From: Joseph Huber Date: Thu, 4 Apr 2024 13:33:19 -0500 Subject: [libc] Move thread sync when closing port earlier Summary: This synchronization should be done before we handle the logic relating to closing the port. This isn't majorly important now but it would break if we ever decided to run a server on the GPU. --- libc/src/__support/RPC/rpc.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/libc/src/__support/RPC/rpc.h b/libc/src/__support/RPC/rpc.h index 5dcae51..05506c0 100644 --- a/libc/src/__support/RPC/rpc.h +++ b/libc/src/__support/RPC/rpc.h @@ -198,12 +198,9 @@ template struct Process { /// convergent, otherwise the compiler will sink the store and deadlock. [[clang::convergent]] LIBC_INLINE void unlock(uint64_t lane_mask, uint32_t index) { - // Do not move any writes past the unlock + // Do not move any writes past the unlock. atomic_thread_fence(cpp::MemoryOrder::RELEASE); - // Wait for other threads in the warp to finish using the lock - gpu::sync_lane(lane_mask); - // Use exactly one thread to clear the nth bit in the lock array Must // restrict to a single thread to avoid one thread dropping the lock, then // an unrelated warp claiming the lock, then a second thread in this warp @@ -331,6 +328,9 @@ public: LIBC_INLINE uint16_t get_index() const { return index; } LIBC_INLINE void close() { + // Wait for all lanes to finish using the port. + gpu::sync_lane(lane_mask); + // The server is passive, if it own the buffer when it closes we need to // give ownership back to the client. if (owns_buffer && T) -- cgit v1.1 From d6713ad80d6907210c629f22babaf12177fa329c Mon Sep 17 00:00:00 2001 From: Kevin Frei Date: Thu, 4 Apr 2024 11:43:55 -0700 Subject: Debuginfod Testing & fixes: 3rd times the charm? (#87676) I believe I've got the tests properly configured to only run on Linux x86(_64), as I don't have a Linux AArch64/Arm device to diagnose what's going wrong with the tests (I suspect there's some issue with generating `.note.gnu.build-id` sections...) The actual code fixes have now been reviewed 3 times: https://github.com/llvm/llvm-project/pull/79181 (moved shell tests to API tests), https://github.com/llvm/llvm-project/pull/85693 (Changed some of the testing infra), and https://github.com/llvm/llvm-project/pull/86812 (didn't get the tests configured quite right). The Debuginfod integration for symbol acquisition in LLDB now works with the `executable` and `debuginfo` Debuginfod network requests working properly for normal, `objcopy --only-keep-debug` stripped, split-dwarf, and `objcopy --only-keep-debug` stripped *plus* split-dwarf symbols/binaries. The reasons for the multiple attempts have been tests on platforms I don't have access to (Linux AArch64/Arm + MacOS x86_64). I believe I've got the tests properly disabled for everything except for Linux x86(_64) now. I've built & tested on MacOS AArch64 and Linux x86_64. --------- Co-authored-by: Kevin Frei --- .../Python/lldbsuite/test/make/Makefile.rules | 26 ++- .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 38 ++-- lldb/source/Plugins/SymbolLocator/CMakeLists.txt | 7 +- .../Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp | 29 +++- lldb/test/API/debuginfod/Normal/Makefile | 19 ++ lldb/test/API/debuginfod/Normal/TestDebuginfod.py | 183 +++++++++++++++++++ lldb/test/API/debuginfod/Normal/main.c | 7 + lldb/test/API/debuginfod/SplitDWARF/Makefile | 23 +++ .../API/debuginfod/SplitDWARF/TestDebuginfodDWP.py | 193 +++++++++++++++++++++ lldb/test/API/debuginfod/SplitDWARF/main.c | 7 + 10 files changed, 515 insertions(+), 17 deletions(-) create mode 100644 lldb/test/API/debuginfod/Normal/Makefile create mode 100644 lldb/test/API/debuginfod/Normal/TestDebuginfod.py create mode 100644 lldb/test/API/debuginfod/Normal/main.c create mode 100644 lldb/test/API/debuginfod/SplitDWARF/Makefile create mode 100644 lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py create mode 100644 lldb/test/API/debuginfod/SplitDWARF/main.c diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index bfd249c..ee8793f 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -51,7 +51,7 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../ # # GNUWin32 uname gives "windows32" or "server version windows32" while # some versions of MSYS uname return "MSYS_NT*", but most environments -# standardize on "Windows_NT", so we'll make it consistent here. +# standardize on "Windows_NT", so we'll make it consistent here. # When running tests from Visual Studio, the environment variable isn't # inherited all the way down to the process spawned for make. #---------------------------------------------------------------------- @@ -210,6 +210,12 @@ else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" DSYM = $(EXE).debug endif + + ifeq "$(MAKE_DWP)" "YES" + MAKE_DWO := YES + DWP_NAME = $(EXE).dwp + DYLIB_DWP_NAME = $(DYLIB_NAME).dwp + endif endif LIMIT_DEBUG_INFO_FLAGS = @@ -357,6 +363,7 @@ ifneq "$(OS)" "Darwin" OBJCOPY ?= $(call replace_cc_with,objcopy) ARCHIVER ?= $(call replace_cc_with,ar) + DWP ?= $(call replace_cc_with,dwp) override AR = $(ARCHIVER) endif @@ -527,6 +534,10 @@ ifneq "$(CXX)" "" endif endif +ifeq "$(GEN_GNU_BUILD_ID)" "YES" + LDFLAGS += -Wl,--build-id +endif + #---------------------------------------------------------------------- # DYLIB_ONLY variable can be used to skip the building of a.out. # See the sections below regarding dSYM file as well as the building of @@ -565,10 +576,17 @@ else endif else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" +ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" + cp "$(EXE)" "$(EXE).unstripped" +endif $(OBJCOPY) --only-keep-debug "$(EXE)" "$(DSYM)" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DSYM)" "$(EXE)" "$(EXE)" endif +ifeq "$(MAKE_DWP)" "YES" + $(DWP) -o "$(DWP_NAME)" $(DWOS) endif +endif + #---------------------------------------------------------------------- # Make the dylib @@ -610,9 +628,15 @@ endif else $(LD) $(DYLIB_OBJECTS) $(LDFLAGS) -shared -o "$(DYLIB_FILENAME)" ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" + ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" + cp "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).unstripped" + endif $(OBJCOPY) --only-keep-debug "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).debug" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DYLIB_FILENAME).debug" "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME)" endif +ifeq "$(MAKE_DWP)" "YES" + $(DWP) -o $(DYLIB_DWP_FILE) $(DYLIB_DWOS) +endif endif #---------------------------------------------------------------------- diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index 49f13d2..dafdf24 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4378,26 +4378,38 @@ const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); ModuleSpec module_spec; module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec(); + FileSpec dwp_filespec; for (const auto &symfile : symfiles.files()) { module_spec.GetSymbolFileSpec() = FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle()); LLDB_LOG(log, "Searching for DWP using: \"{0}\"", module_spec.GetSymbolFileSpec()); - FileSpec dwp_filespec = + dwp_filespec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); if (FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - GetObjectFile()->GetModule(), &dwp_filespec, 0, - FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, - dwp_file_data_offset); - if (dwp_obj_file) { - m_dwp_symfile = std::make_shared( - *this, dwp_obj_file, DIERef::k_file_index_mask); - break; - } + break; + } + } + if (!FileSystem::Instance().Exists(dwp_filespec)) { + LLDB_LOG(log, "No DWP file found locally"); + // Fill in the UUID for the module we're trying to match for, so we can + // find the correct DWP file, as the Debuginfod plugin uses *only* this + // data to correctly match the DWP file with the binary. + module_spec.GetUUID() = m_objfile_sp->GetUUID(); + dwp_filespec = + PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); + } + if (FileSystem::Instance().Exists(dwp_filespec)) { + LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + GetObjectFile()->GetModule(), &dwp_filespec, 0, + FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, + dwp_file_data_offset); + if (dwp_obj_file) { + m_dwp_symfile = std::make_shared( + *this, dwp_obj_file, DIERef::k_file_index_mask); } } if (!m_dwp_symfile) { diff --git a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt index ca96962..3367022 100644 --- a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt @@ -1,5 +1,10 @@ +# Order matters here: the first symbol locator prevents further searching. +# For DWARF binaries that are both stripped and split, the Default plugin +# will return the stripped binary when asked for the ObjectFile, which then +# prevents an unstripped binary from being requested from the Debuginfod +# provider. +add_subdirectory(Debuginfod) add_subdirectory(Default) if (CMAKE_SYSTEM_NAME MATCHES "Darwin") add_subdirectory(DebugSymbols) endif() -add_subdirectory(Debuginfod) diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index b5fe35d..f296e65 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -44,6 +44,24 @@ llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() { "executables."; } +// If this is needed elsewhere, it can be exported/moved. +static bool IsDwpSymbolFile(const lldb::ModuleSP &module_sp, + const FileSpec &file_spec) { + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + // Try to create an ObjectFile from the file_spec. + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec), + dwp_file_data_sp, dwp_file_data_offset); + // The presence of a debug_cu_index section is the key identifying feature of + // a DWP file. Make sure we don't fill in the section list on dwp_obj_file + // (by calling GetSectionList(false)) as this function could be called before + // we may have all the symbol files collected and available. + return dwp_obj_file && ObjectFileELF::classof(dwp_obj_file.get()) && + dwp_obj_file->GetSectionList(false)->FindSectionByType( + eSectionTypeDWARFDebugCuIndex, false); +} + // CreateInstance // // Platforms can register a callback to use when creating symbol vendors to @@ -87,8 +105,15 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); FileSpec dsym_fspec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - if (!dsym_fspec) - return nullptr; + if (!dsym_fspec || IsDwpSymbolFile(module_sp, dsym_fspec)) { + // If we have a stripped binary or if we got a DWP file, we should prefer + // symbols in the executable acquired through a plugin. + ModuleSpec unstripped_spec = + PluginManager::LocateExecutableObjectFile(module_spec); + if (!unstripped_spec) + return nullptr; + dsym_fspec = unstripped_spec.GetFileSpec(); + } DataBufferSP dsym_file_data_sp; lldb::offset_t dsym_file_data_offset = 0; diff --git a/lldb/test/API/debuginfod/Normal/Makefile b/lldb/test/API/debuginfod/Normal/Makefile new file mode 100644 index 0000000..54bd7ad --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/Makefile @@ -0,0 +1,19 @@ +C_SOURCES := main.c + +# For normal (non DWP) Debuginfod tests, we need: + +# * The full binary: a.out.unstripped +# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and +# SPLIT_DEBUG_SYMBOLS set to YES + +# * The stripped binary (a.out) +# Produced by Makefile.rules with SPLIT_DEBUG_SYMBOLS set to YES + +# * The 'only-keep-debug' binary (a.out.debug) +# Produced below + +SPLIT_DEBUG_SYMBOLS := YES +SAVE_FULL_DEBUG_BINARY := YES +GEN_GNU_BUILD_ID := YES + +include Makefile.rules diff --git a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py new file mode 100644 index 0000000..f1be1e7 --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py @@ -0,0 +1,183 @@ +import os +import shutil +import tempfile + +import lldb +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +""" +Test support for the DebugInfoD network symbol acquisition protocol. +This one is for simple / no split-dwarf scenarios. + +For no-split-dwarf scenarios, there are 2 variations: +1 - A stripped binary with it's corresponding unstripped binary: +2 - A stripped binary with a corresponding --only-keep-debug symbols file +""" + + +# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots +class DebugInfodTests(TestBase): + # No need to try every flavor of debug inf. + NO_DEBUG_INFO_TESTCASE = True + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_normal_no_symbols(self): + """ + Validate behavior with no symbols or symbol locator. + ('baseline negative' behavior) + """ + test_root = self.config_test(["a.out"]) + self.try_breakpoint(False) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_normal_default(self): + """ + Validate behavior with symbols, but no symbol locator. + ('baseline positive' behavior) + """ + test_root = self.config_test(["a.out", "a.out.debug"]) + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_symbols(self): + """ + Test behavior with the full binary available from Debuginfod as + 'debuginfo' from the plug-in. + """ + test_root = self.config_test(["a.out"], "a.out.unstripped") + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_executable(self): + """ + Test behavior with the full binary available from Debuginfod as + 'executable' from the plug-in. + """ + test_root = self.config_test(["a.out"], None, "a.out.unstripped") + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_okd_symbols(self): + """ + Test behavior with the 'only-keep-debug' symbols available from Debuginfod. + """ + test_root = self.config_test(["a.out"], "a.out.debug") + self.try_breakpoint(True) + + def try_breakpoint(self, should_have_loc): + """ + This function creates a target from self.aout, sets a function-name + breakpoint, and checks to see if we have a file/line location, + as a way to validate that the symbols have been loaded. + should_have_loc specifies if we're testing that symbols have or + haven't been loaded. + """ + target = self.dbg.CreateTarget(self.aout) + self.assertTrue(target and target.IsValid(), "Target is valid") + + bp = target.BreakpointCreateByName("func") + self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") + self.assertEqual(bp.GetNumLocations(), 1) + + loc = bp.GetLocationAtIndex(0) + self.assertTrue(loc and loc.IsValid(), "Location is valid") + addr = loc.GetAddress() + self.assertTrue(addr and addr.IsValid(), "Loc address is valid") + line_entry = addr.GetLineEntry() + self.assertEqual( + should_have_loc, + line_entry != None and line_entry.IsValid(), + "Loc line entry is valid", + ) + if should_have_loc: + self.assertEqual(line_entry.GetLine(), 4) + self.assertEqual( + line_entry.GetFileSpec().GetFilename(), + self.main_source_file.GetFilename(), + ) + self.dbg.DeleteTarget(target) + shutil.rmtree(self.tmp_dir) + + def config_test(self, local_files, debuginfo=None, executable=None): + """ + Set up a test with local_files[] copied to a different location + so that we control which files are, or are not, found in the file system. + Also, create a stand-alone file-system 'hosted' debuginfod server with the + provided debuginfo and executable files (if they exist) + + Make the filesystem look like: + + /tmp//test/[local_files] + + /tmp//cache (for lldb to use as a temp cache) + + /tmp//buildid//executable -> + /tmp//buildid//debuginfo -> + Returns the /tmp/ path + """ + + self.build() + + uuid = self.getUUID("a.out") + if not uuid: + self.fail("Could not get UUID for a.out") + return + self.main_source_file = lldb.SBFileSpec("main.c") + self.tmp_dir = tempfile.mkdtemp() + test_dir = os.path.join(self.tmp_dir, "test") + os.makedirs(test_dir) + + self.aout = "" + # Copy the files used by the test: + for f in local_files: + shutil.copy(self.getBuildArtifact(f), test_dir) + # The first item is the binary to be used for the test + if self.aout == "": + self.aout = os.path.join(test_dir, f) + + use_debuginfod = debuginfo != None or executable != None + + # Populated the 'file://... mocked' Debuginfod server: + if use_debuginfod: + os.makedirs(os.path.join(self.tmp_dir, "cache")) + uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) + os.makedirs(uuid_dir) + if debuginfo: + shutil.copy( + self.getBuildArtifact(debuginfo), + os.path.join(uuid_dir, "debuginfo"), + ) + if executable: + shutil.copy( + self.getBuildArtifact(executable), + os.path.join(uuid_dir, "executable"), + ) + + # Configure LLDB for the test: + self.runCmd( + "settings set symbols.enable-external-lookup %s" + % str(use_debuginfod).lower() + ) + self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") + if use_debuginfod: + self.runCmd( + "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" + % self.tmp_dir + ) + self.runCmd( + "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" + % self.tmp_dir + ) + + def getUUID(self, filename): + try: + target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) + module = target.GetModuleAtIndex(0) + uuid = module.GetUUIDString().replace("-", "").lower() + self.dbg.DeleteTarget(target) + return uuid if len(uuid) == 40 else None + except: + return None diff --git a/lldb/test/API/debuginfod/Normal/main.c b/lldb/test/API/debuginfod/Normal/main.c new file mode 100644 index 0000000..4c71846 --- /dev/null +++ b/lldb/test/API/debuginfod/Normal/main.c @@ -0,0 +1,7 @@ +// This is a dump little pair of test files + +int func(int argc, const char *argv[]) { + return (argc + 1) * (argv[argc][0] + 2); +} + +int main(int argc, const char *argv[]) { return func(0, argv); } diff --git a/lldb/test/API/debuginfod/SplitDWARF/Makefile b/lldb/test/API/debuginfod/SplitDWARF/Makefile new file mode 100644 index 0000000..3ab9a96 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/Makefile @@ -0,0 +1,23 @@ +C_SOURCES := main.c + +# For split-dwarf Debuginfod tests, we need: + +# * A .DWP file (a.out.dwp) +# Produced by Makefile.rules with MAKE_DWP set to YES + +# * The "full" binary (missing things that live in .dwo's) (a.out.unstripped) +# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and +# SPLIT_DEBUG_SYMBOLS set to YES + +# * The stripped binary (a.out) +# Produced by Makefile.rules + +# * The 'only-keep-debug' binary (a.out.debug) +# Produced below + +MAKE_DWP := YES +SPLIT_DEBUG_SYMBOLS := YES +SAVE_FULL_DEBUG_BINARY := YES +GEN_GNU_BUILD_ID := YES + +include Makefile.rules diff --git a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py new file mode 100644 index 0000000..fec2fa1 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py @@ -0,0 +1,193 @@ +""" +Test support for the DebugInfoD network symbol acquisition protocol. +""" +import os +import shutil +import tempfile + +import lldb +from lldbsuite.test.decorators import * +import lldbsuite.test.lldbutil as lldbutil +from lldbsuite.test.lldbtest import * + + +""" +Test support for the DebugInfoD network symbol acquisition protocol. +This file is for split-dwarf (dwp) scenarios. + +1 - A split binary target with it's corresponding DWP file +2 - A stripped, split binary target with an unstripped binary and a DWP file +3 - A stripped, split binary target with an --only-keep-debug symbols file and a DWP file +""" + + +# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots +class DebugInfodDWPTests(TestBase): + # No need to try every flavor of debug inf. + NO_DEBUG_INFO_TESTCASE = True + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_normal_stripped(self): + """ + Validate behavior with a stripped binary, no symbols or symbol locator. + """ + self.config_test(["a.out"]) + self.try_breakpoint(False) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_normal_stripped_split_with_dwp(self): + """ + Validate behavior with symbols, but no symbol locator. + """ + self.config_test(["a.out", "a.out.debug", "a.out.dwp"]) + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_normal_stripped_only_dwp(self): + """ + Validate behavior *with* dwp symbols only, but missing other symbols, + but no symbol locator. This shouldn't work: without the other symbols + DWO's appear mostly useless. + """ + self.config_test(["a.out", "a.out.dwp"]) + self.try_breakpoint(False) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_dwp_from_service(self): + """ + Test behavior with the unstripped binary, and DWP from the service. + """ + self.config_test(["a.out.debug"], "a.out.dwp") + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_both_symfiles_from_service(self): + """ + Test behavior with a stripped binary, with the unstripped binary and + dwp symbols from Debuginfod. + """ + self.config_test(["a.out"], "a.out.dwp", "a.out.unstripped") + self.try_breakpoint(True) + + @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) + def test_debuginfod_both_okd_symfiles_from_service(self): + """ + Test behavior with both the only-keep-debug symbols and the dwp symbols + from Debuginfod. + """ + self.config_test(["a.out"], "a.out.dwp", "a.out.debug") + self.try_breakpoint(True) + + def try_breakpoint(self, should_have_loc): + """ + This function creates a target from self.aout, sets a function-name + breakpoint, and checks to see if we have a file/line location, + as a way to validate that the symbols have been loaded. + should_have_loc specifies if we're testing that symbols have or + haven't been loaded. + """ + target = self.dbg.CreateTarget(self.aout) + self.assertTrue(target and target.IsValid(), "Target is valid") + + bp = target.BreakpointCreateByName("func") + self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") + self.assertEqual(bp.GetNumLocations(), 1) + + loc = bp.GetLocationAtIndex(0) + self.assertTrue(loc and loc.IsValid(), "Location is valid") + addr = loc.GetAddress() + self.assertTrue(addr and addr.IsValid(), "Loc address is valid") + line_entry = addr.GetLineEntry() + self.assertEqual( + should_have_loc, + line_entry != None and line_entry.IsValid(), + "Loc line entry is valid", + ) + if should_have_loc: + self.assertEqual(line_entry.GetLine(), 4) + self.assertEqual( + line_entry.GetFileSpec().GetFilename(), + self.main_source_file.GetFilename(), + ) + self.dbg.DeleteTarget(target) + shutil.rmtree(self.tmp_dir) + + def config_test(self, local_files, debuginfo=None, executable=None): + """ + Set up a test with local_files[] copied to a different location + so that we control which files are, or are not, found in the file system. + Also, create a stand-alone file-system 'hosted' debuginfod server with the + provided debuginfo and executable files (if they exist) + + Make the filesystem look like: + + /tmp//test/[local_files] + + /tmp//cache (for lldb to use as a temp cache) + + /tmp//buildid//executable -> + /tmp//buildid//debuginfo -> + Returns the /tmp/ path + """ + + self.build() + + uuid = self.getUUID("a.out") + if not uuid: + self.fail("Could not get UUID for a.out") + return + self.main_source_file = lldb.SBFileSpec("main.c") + self.tmp_dir = tempfile.mkdtemp() + self.test_dir = os.path.join(self.tmp_dir, "test") + os.makedirs(self.test_dir) + + self.aout = "" + # Copy the files used by the test: + for f in local_files: + shutil.copy(self.getBuildArtifact(f), self.test_dir) + if self.aout == "": + self.aout = os.path.join(self.test_dir, f) + + use_debuginfod = debuginfo != None or executable != None + + # Populated the 'file://... mocked' Debuginfod server: + if use_debuginfod: + os.makedirs(os.path.join(self.tmp_dir, "cache")) + uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) + os.makedirs(uuid_dir) + if debuginfo: + shutil.copy( + self.getBuildArtifact(debuginfo), + os.path.join(uuid_dir, "debuginfo"), + ) + if executable: + shutil.copy( + self.getBuildArtifact(executable), + os.path.join(uuid_dir, "executable"), + ) + os.remove(self.getBuildArtifact("main.dwo")) + # Configure LLDB for the test: + self.runCmd( + "settings set symbols.enable-external-lookup %s" + % str(use_debuginfod).lower() + ) + self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") + if use_debuginfod: + self.runCmd( + "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" + % self.tmp_dir + ) + self.runCmd( + "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" + % self.tmp_dir + ) + + def getUUID(self, filename): + try: + target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) + module = target.GetModuleAtIndex(0) + uuid = module.GetUUIDString().replace("-", "").lower() + self.dbg.DeleteTarget(target) + return uuid if len(uuid) == 40 else None + except: + return None diff --git a/lldb/test/API/debuginfod/SplitDWARF/main.c b/lldb/test/API/debuginfod/SplitDWARF/main.c new file mode 100644 index 0000000..4c71846 --- /dev/null +++ b/lldb/test/API/debuginfod/SplitDWARF/main.c @@ -0,0 +1,7 @@ +// This is a dump little pair of test files + +int func(int argc, const char *argv[]) { + return (argc + 1) * (argv[argc][0] + 2); +} + +int main(int argc, const char *argv[]) { return func(0, argv); } -- cgit v1.1 From b798c2af8b8888f8db2a9ec293066fb72d893f0c Mon Sep 17 00:00:00 2001 From: Mark de Wever Date: Thu, 4 Apr 2024 20:48:23 +0200 Subject: [libc++][CI] Updates to Clang 19. (#85301) Since we have released Clang 16 is no longer actively supported. However the FreeBSD runner is still using this, so some tests still guard against Clang 16. --- .github/workflows/libcxx-build-and-test.yaml | 28 +++++++++++----------- libcxx/docs/index.rst | 2 +- .../diagnose_invalid_memory_order.verify.cpp | 2 +- .../no_unique_address.compile.pass.cpp | 2 +- .../range.split/no_unique_address.compile.pass.cpp | 2 +- .../no_unique_address.compile.pass.cpp | 2 +- 6 files changed, 19 insertions(+), 19 deletions(-) diff --git a/.github/workflows/libcxx-build-and-test.yaml b/.github/workflows/libcxx-build-and-test.yaml index 4a881ef..1e93677 100644 --- a/.github/workflows/libcxx-build-and-test.yaml +++ b/.github/workflows/libcxx-build-and-test.yaml @@ -38,11 +38,11 @@ env: # LLVM POST-BRANCH bump version # LLVM POST-BRANCH add compiler test for ToT - 1, e.g. "Clang 17" # LLVM RELEASE bump remove compiler ToT - 3, e.g. "Clang 15" - LLVM_HEAD_VERSION: "18" # Used compiler, update POST-BRANCH. - LLVM_PREVIOUS_VERSION: "17" - LLVM_OLDEST_VERSION: "16" + LLVM_HEAD_VERSION: "19" # Used compiler, update POST-BRANCH. + LLVM_PREVIOUS_VERSION: "18" + LLVM_OLDEST_VERSION: "17" GCC_STABLE_VERSION: "13" - LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-18" + LLVM_SYMBOLIZER_PATH: "/usr/bin/llvm-symbolizer-19" CLANG_CRASH_DIAGNOSTICS_DIR: "crash_diagnostics" @@ -59,8 +59,8 @@ jobs: 'generic-cxx26', 'generic-modules' ] - cc: [ 'clang-18' ] - cxx: [ 'clang++-18' ] + cc: [ 'clang-19' ] + cxx: [ 'clang++-19' ] clang_tidy: [ 'ON' ] include: - config: 'generic-gcc' @@ -100,8 +100,8 @@ jobs: 'generic-cxx20', 'generic-cxx23' ] - cc: [ 'clang-18' ] - cxx: [ 'clang++-18' ] + cc: [ 'clang-19' ] + cxx: [ 'clang++-19' ] clang_tidy: [ 'ON' ] include: - config: 'generic-gcc-cxx11' @@ -109,13 +109,13 @@ jobs: cxx: 'g++-13' clang_tidy: 'OFF' - config: 'generic-cxx23' - cc: 'clang-16' - cxx: 'clang++-16' - clang_tidy: 'OFF' - - config: 'generic-cxx23' cc: 'clang-17' cxx: 'clang++-17' clang_tidy: 'OFF' + - config: 'generic-cxx26' + cc: 'clang-18' + cxx: 'clang++-18' + clang_tidy: 'ON' steps: - uses: actions/checkout@v4 - name: ${{ matrix.config }} @@ -186,8 +186,8 @@ jobs: - name: ${{ matrix.config }} run: libcxx/utils/ci/run-buildbot ${{ matrix.config }} env: - CC: clang-18 - CXX: clang++-18 + CC: clang-19 + CXX: clang++-19 ENABLE_CLANG_TIDY: "OFF" - uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 if: always() diff --git a/libcxx/docs/index.rst b/libcxx/docs/index.rst index db55c6f..743f992 100644 --- a/libcxx/docs/index.rst +++ b/libcxx/docs/index.rst @@ -134,7 +134,7 @@ velocity, libc++ drops support for older compilers as newer ones are released. ============ =============== ========================== ===================== Compiler Versions Restrictions Support policy ============ =============== ========================== ===================== -Clang 16, 17, 18-git latest two stable releases per `LLVM's release page `_ and the development version +Clang 17, 18, 19-git latest two stable releases per `LLVM's release page `_ and the development version AppleClang 15 latest stable release per `Xcode's release page `_ Open XL 17.1 (AIX) latest stable release per `Open XL's documentation page `_ GCC 13 In C++11 or later only latest stable release per `GCC's release page `_ diff --git a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp index defd43c..2790916 100644 --- a/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp +++ b/libcxx/test/libcxx/atomics/diagnose_invalid_memory_order.verify.cpp @@ -9,7 +9,7 @@ // This test fails with Clang <18 because diagnose_if doesn't emit all of the // diagnostics when -fdelayed-template-parsing is enabled, like it is in MSVC // mode. -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // REQUIRES: diagnose-if-support diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp index b411ce1..a0bfb7c 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.lazy.split/no_unique_address.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // class lazy_split_view { // _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); diff --git a/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp index 0d8bfbc..694cf1fd 100644 --- a/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.adaptors/range.split/no_unique_address.compile.pass.cpp @@ -7,7 +7,7 @@ //===----------------------------------------------------------------------===// // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // class split_view { // _LIBCPP_NO_UNIQUE_ADDRESS _View __base_ = _View(); diff --git a/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp b/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp index 8359d26..a77c4e4 100644 --- a/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp +++ b/libcxx/test/libcxx/ranges/range.factories/range.istream.view/no_unique_address.compile.pass.cpp @@ -8,7 +8,7 @@ // UNSUPPORTED: no-localization // UNSUPPORTED: c++03, c++11, c++14, c++17 -// XFAIL: msvc && (clang-16 || clang-17) +// XFAIL: msvc && clang-17 // Test the libc++ extension that the value stored in `std::ranges::istream_view` has been marked // as _LIBCPP_NO_UNIQUE_ADDRESS -- cgit v1.1 From eeaaf33fc296d52a28252ba0d6cfe187b7b3412f Mon Sep 17 00:00:00 2001 From: Jonathan Peyton Date: Thu, 4 Apr 2024 13:54:40 -0500 Subject: [OpenMP] Unsupport absolute KMP_HW_SUBSET test for s390x (#87555) --- openmp/runtime/test/affinity/kmp-abs-hw-subset.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/openmp/runtime/test/affinity/kmp-abs-hw-subset.c b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c index 7b3493f..025a239 100644 --- a/openmp/runtime/test/affinity/kmp-abs-hw-subset.c +++ b/openmp/runtime/test/affinity/kmp-abs-hw-subset.c @@ -6,6 +6,12 @@ // RUN: env OMP_PLACES=threads %libomp-run 3 1 // RUN: env OMP_PLACES=threads %libomp-run 3 2 // REQUIRES: linux +// +// The test requires topologies with sockets, cores, threads layers where +// the socket layer contains multiple threads. +// The s390x architecture does not produce this topology and seems to have +// one thread per socket. +// UNSUPPORTED: s390x-target-arch #include #include -- cgit v1.1 From 8a0bfe490592de3df28d82c5dd69956e43c20f1d Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 4 Apr 2024 12:02:06 -0700 Subject: [SLP]Fix PR87630: wrong result for externally used vector value. Need to check that the externally used value can be represented with the BitWidth before applying it, otherwise need to keep wider type. --- llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp | 10 ++++++++++ .../X86/external-user-instruction-minbitwidth.ll | 12 ++++-------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp index 79d146a..bdd26ac 100644 --- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp +++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp @@ -14141,6 +14141,16 @@ bool BoUpSLP::collectValuesToDemote( })) return FinalAnalysis(); + if (!all_of(I->users(), + [=](User *U) { + return getTreeEntry(U) || + (UserIgnoreList && UserIgnoreList->contains(U)) || + (U->getType()->isSized() && + DL->getTypeSizeInBits(U->getType()) <= BitWidth); + }) && + !IsPotentiallyTruncated(I, BitWidth)) + return false; + unsigned Start = 0; unsigned End = I->getNumOperands(); diff --git a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll index 9d3d602..84f7e21 100644 --- a/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll +++ b/llvm/test/Transforms/SLPVectorizer/X86/external-user-instruction-minbitwidth.ll @@ -14,17 +14,13 @@ define i8 @test() { ; CHECK-NEXT: [[CONV1:%.*]] = zext i16 [[TMP1]] to i32 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <8 x i32> poison, i32 [[CONV]], i32 0 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP4:%.*]] = trunc <8 x i32> [[TMP3]] to <8 x i16> -; CHECK-NEXT: [[TMP5:%.*]] = or <8 x i16> [[TMP4]], +; CHECK-NEXT: [[TMP4:%.*]] = or <8 x i32> [[TMP3]], ; CHECK-NEXT: [[TMP6:%.*]] = insertelement <8 x i32> poison, i32 [[CONV1]], i32 0 ; CHECK-NEXT: [[TMP7:%.*]] = shufflevector <8 x i32> [[TMP6]], <8 x i32> poison, <8 x i32> zeroinitializer -; CHECK-NEXT: [[TMP8:%.*]] = trunc <8 x i32> [[TMP7]] to <8 x i16> -; CHECK-NEXT: [[TMP9:%.*]] = add <8 x i16> [[TMP5]], [[TMP8]] -; CHECK-NEXT: [[TMP10:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP9]]) -; CHECK-NEXT: [[TMP11:%.*]] = sext i16 [[TMP10]] to i32 +; CHECK-NEXT: [[TMP8:%.*]] = add nsw <8 x i32> [[TMP4]], [[TMP7]] +; CHECK-NEXT: [[TMP11:%.*]] = call i32 @llvm.vector.reduce.or.v8i32(<8 x i32> [[TMP8]]) ; CHECK-NEXT: [[CONV4_30:%.*]] = trunc i32 [[TMP11]] to i8 -; CHECK-NEXT: [[TMP12:%.*]] = extractelement <8 x i16> [[TMP5]], i32 7 -; CHECK-NEXT: [[TMP13:%.*]] = sext i16 [[TMP12]] to i32 +; CHECK-NEXT: [[TMP13:%.*]] = extractelement <8 x i32> [[TMP4]], i32 7 ; CHECK-NEXT: [[XOR_31:%.*]] = and i32 [[TMP13]], -2 ; CHECK-NEXT: store i32 [[XOR_31]], ptr @d, align 4 ; CHECK-NEXT: ret i8 [[CONV4_30]] -- cgit v1.1 From 9a0ae081047d7088cdecfa86a8c90b721485e418 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 12:21:50 -0700 Subject: [NFC][HWASAN] Simplify `selectiveInstrumentationShouldSkip` (#87670) --- .../Instrumentation/HWAddressSanitizer.cpp | 36 ++++++++++------------ 1 file changed, 16 insertions(+), 20 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index d0d349c..88e84ed 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -317,7 +317,7 @@ private: }; bool selectiveInstrumentationShouldSkip(Function &F, - FunctionAnalysisManager &FAM); + FunctionAnalysisManager &FAM) const; void initializeModule(); void createHwasanCtorComdat(); @@ -1500,28 +1500,24 @@ bool HWAddressSanitizer::instrumentStack(memtag::StackInfo &SInfo, } bool HWAddressSanitizer::selectiveInstrumentationShouldSkip( - Function &F, FunctionAnalysisManager &FAM) { + Function &F, FunctionAnalysisManager &FAM) const { if (ClRandomSkipRate.getNumOccurrences()) { std::bernoulli_distribution D(ClRandomSkipRate); - if (D(*Rng)) - return true; - } else { - auto &MAMProxy = FAM.getResult(F); - ProfileSummaryInfo *PSI = - MAMProxy.getCachedResult(*F.getParent()); - if (PSI && PSI->hasProfileSummary()) { - auto &BFI = FAM.getResult(F); - if ((ClHotPercentileCutoff.getNumOccurrences() && - ClHotPercentileCutoff >= 0) - ? PSI->isFunctionHotInCallGraphNthPercentile( - ClHotPercentileCutoff, &F, BFI) - : PSI->isFunctionHotInCallGraph(&F, BFI)) - return true; - } else { - ++NumNoProfileSummaryFuncs; - } + return (D(*Rng)); } - return false; + auto &MAMProxy = FAM.getResult(F); + ProfileSummaryInfo *PSI = + MAMProxy.getCachedResult(*F.getParent()); + if (!PSI || !PSI->hasProfileSummary()) { + ++NumNoProfileSummaryFuncs; + return false; + } + auto &BFI = FAM.getResult(F); + return ( + (ClHotPercentileCutoff.getNumOccurrences() && ClHotPercentileCutoff >= 0) + ? PSI->isFunctionHotInCallGraphNthPercentile(ClHotPercentileCutoff, + &F, BFI) + : PSI->isFunctionHotInCallGraph(&F, BFI)); } void HWAddressSanitizer::sanitizeFunction(Function &F, -- cgit v1.1 From c0c11e788a30e01128776bdd2f206c2a0d799a1f Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 12:27:18 -0700 Subject: [NFC][HWASAN] Cleanup opt opt test (#87687) Main change is replacing DEFAULT with HOT99. I'll remove DEFAULT related functionality in the followup patches. --- .../HWAddressSanitizer/pgo-opt-out.ll | 24 +++++++++++----------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll index 28e43a9..e568f5b 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll @@ -1,23 +1,23 @@ ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: | FileCheck %s --check-prefix=DEFAULT +; RUN: -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT70 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT_RATE +; RUN: | FileCheck %s --check-prefix=HOT99 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM_RATE_0 +; RUN: -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM0 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=1.0 | FileCheck %s --check-prefix=RANDOM_RATE_1 +; RUN: -hwasan-random-skip-rate=1.0 | FileCheck %s --check-prefix=RANDOM1 -; DEFAULT: @sanitized -; DEFAULT-NEXT: %x = alloca i8, i64 4 +; HOT70: @sanitized +; HOT70-NEXT: @__hwasan_tls -; HOT_RATE: @sanitized -; HOT_RATE-NEXT: @__hwasan_tls +; HOT99: @sanitized +; HOT99-NEXT: %x = alloca i8, i64 4 -; RANDOM_RATE_0: @sanitized -; RANDOM_RATE_0-NEXT: @__hwasan_tls +; RANDOM0: @sanitized +; RANDOM0-NEXT: @__hwasan_tls -; RANDOM_RATE_1: @sanitized -; RANDOM_RATE_1-NEXT: %x = alloca i8, i64 4 +; RANDOM1: @sanitized +; RANDOM1-NEXT: %x = alloca i8, i64 4 declare void @use(ptr) -- cgit v1.1 From beded9b9ceab19f81320c7cf5e3600a7745c8f05 Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 4 Apr 2024 12:28:10 -0700 Subject: [HWASan] Allow stack_history_size of 4096 (#86362) There is no reason to limit the minimum to two pages. --- compiler-rt/lib/hwasan/hwasan_thread_list.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/compiler-rt/lib/hwasan/hwasan_thread_list.h b/compiler-rt/lib/hwasan/hwasan_thread_list.h index d0eebd1..f36d278 100644 --- a/compiler-rt/lib/hwasan/hwasan_thread_list.h +++ b/compiler-rt/lib/hwasan/hwasan_thread_list.h @@ -18,7 +18,7 @@ // * Start of the shadow memory region is aligned to 2**kShadowBaseAlignment. // * All stack ring buffers are located within (2**kShadowBaseAlignment) // sized region below and adjacent to the shadow region. -// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 8), and is +// * Each ring buffer has a size of (2**N)*4096 where N is in [0, 7), and is // aligned to twice its size. The value of N can be different for each buffer. // // These constrains guarantee that, given an address A of any element of the @@ -55,7 +55,7 @@ static uptr RingBufferSize() { uptr desired_bytes = flags()->stack_history_size * sizeof(uptr); // FIXME: increase the limit to 8 once this bug is fixed: // https://bugs.llvm.org/show_bug.cgi?id=39030 - for (int shift = 1; shift < 7; ++shift) { + for (int shift = 0; shift < 7; ++shift) { uptr size = 4096 * (1ULL << shift); if (size >= desired_bytes) return size; -- cgit v1.1 From 7e87d03b45f3ca0f6d9c09e8e9090329cc84592e Mon Sep 17 00:00:00 2001 From: Keyi Zhang Date: Thu, 4 Apr 2024 12:32:47 -0700 Subject: [MLIR][CF] Fix cf.switch parsing with result numbers (#87658) This PR should fix the parsing bug reported in https://github.com/llvm/llvm-project/issues/87430. It allows using result number as the `cf.switch` operand. --- mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp | 4 ++-- mlir/test/Dialect/ControlFlow/ops.mlir | 13 +++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp index 5d11f8f6..1320db3 100644 --- a/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp +++ b/mlir/lib/Dialect/ControlFlow/IR/ControlFlowOps.cpp @@ -531,8 +531,8 @@ static ParseResult parseSwitchOpCases( failed(parser.parseSuccessor(destination))) return failure(); if (succeeded(parser.parseOptionalLParen())) { - if (failed(parser.parseOperandList(operands, OpAsmParser::Delimiter::None, - /*allowResultNumber=*/false)) || + if (failed(parser.parseOperandList(operands, + OpAsmParser::Delimiter::None)) || failed(parser.parseColonTypeList(operandTypes)) || failed(parser.parseRParen())) return failure(); diff --git a/mlir/test/Dialect/ControlFlow/ops.mlir b/mlir/test/Dialect/ControlFlow/ops.mlir index 8453c2b..c9317c7 100644 --- a/mlir/test/Dialect/ControlFlow/ops.mlir +++ b/mlir/test/Dialect/ControlFlow/ops.mlir @@ -38,3 +38,16 @@ func.func @switch_i64(%flag : i64, %caseOperand : i32) { ^bb3(%bb3arg : i32): return } + +// CHECK-LABEL: func @switch_result_number +func.func @switch_result_number(%arg0: i32) { + %0:2 = "test.op_with_two_results"() : () -> (i32, i32) + cf.switch %arg0 : i32, [ + default: ^bb2, + 0: ^bb1(%0#0 : i32) + ] + ^bb1(%1: i32): + return + ^bb2: + return +} -- cgit v1.1 From ce46b400d568994c45361c3061554bd2b2e81b8b Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 12:34:41 -0700 Subject: [NFC][UBSAN] Similar to #87687 for UBSAN --- llvm/test/Transforms/RemoveTraps/remove-traps.ll | 128 +++++++++++------------ 1 file changed, 64 insertions(+), 64 deletions(-) diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll index 71549e7..e3cca83 100644 --- a/llvm/test/Transforms/RemoveTraps/remove-traps.ll +++ b/llvm/test/Transforms/RemoveTraps/remove-traps.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes='function(remove-traps)' -S | FileCheck %s --check-prefixes=NOPROFILE ; RUN: opt < %s -passes='function(remove-traps)' -remove-traps-random-rate=1 -S | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT +; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT99 ; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 target triple = "x86_64-pc-linux-gnu" @@ -30,16 +30,16 @@ define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; ALL-NEXT: ret i32 [[TMP5]] ; -; HOT-LABEL: define dso_local noundef i32 @simple( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] +; HOT99-LABEL: define dso_local noundef i32 @simple( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] ; ; HOT70-LABEL: define dso_local noundef i32 @simple( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { @@ -87,15 +87,15 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; ALL-NEXT: ret i32 [[TMP5]] ; -; HOT-LABEL: define dso_local noundef i32 @hot( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] +; HOT99-LABEL: define dso_local noundef i32 @hot( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] ; ; HOT70-LABEL: define dso_local noundef i32 @hot( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { @@ -142,15 +142,15 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; ALL-NEXT: ret i32 [[TMP5]] ; -; HOT-LABEL: define dso_local noundef i32 @veryHot( -; HOT-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { -; HOT-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] -; HOT: 3: -; HOT-NEXT: unreachable -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 -; HOT-NEXT: ret i32 [[TMP5]] +; HOT99-LABEL: define dso_local noundef i32 @veryHot( +; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { +; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null +; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99: 3: +; HOT99-NEXT: unreachable +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 +; HOT99-NEXT: ret i32 [[TMP5]] ; ; HOT70-LABEL: define dso_local noundef i32 @veryHot( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { @@ -209,22 +209,22 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon ; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] ; ALL-NEXT: ret i32 [[TMP10]] ; -; HOT-LABEL: define dso_local noundef i32 @branchColdFnHot( -; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { -; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT: 6: -; HOT-NEXT: tail call void @llvm.ubsantrap(i8 22) -; HOT-NEXT: unreachable -; HOT: 7: -; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT-NEXT: br label [[TMP9]] -; HOT: 9: -; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT-NEXT: ret i32 [[TMP10]] +; HOT99-LABEL: define dso_local noundef i32 @branchColdFnHot( +; HOT99-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { +; HOT99-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT99-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99: 6: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) +; HOT99-NEXT: unreachable +; HOT99: 7: +; HOT99-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT99-NEXT: br label [[TMP9]] +; HOT99: 9: +; HOT99-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT99-NEXT: ret i32 [[TMP10]] ; ; HOT70-LABEL: define dso_local noundef i32 @branchColdFnHot( ; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF17]] { @@ -297,21 +297,21 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; ALL-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] ; ALL-NEXT: ret i32 [[TMP10]] ; -; HOT-LABEL: define dso_local noundef i32 @branchHotFnCold( -; HOT-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { -; HOT-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 -; HOT-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] -; HOT: 4: -; HOT-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] -; HOT: 6: -; HOT-NEXT: unreachable -; HOT: 7: -; HOT-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 -; HOT-NEXT: br label [[TMP9]] -; HOT: 9: -; HOT-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] -; HOT-NEXT: ret i32 [[TMP10]] +; HOT99-LABEL: define dso_local noundef i32 @branchHotFnCold( +; HOT99-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { +; HOT99-NEXT: [[TMP3:%.*]] = icmp eq i32 [[TMP0]], 0 +; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] +; HOT99: 4: +; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null +; HOT99-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99: 6: +; HOT99-NEXT: unreachable +; HOT99: 7: +; HOT99-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 +; HOT99-NEXT: br label [[TMP9]] +; HOT99: 9: +; HOT99-NEXT: [[TMP10:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ 0, [[TMP2:%.*]] ] +; HOT99-NEXT: ret i32 [[TMP10]] ; ; HOT70-LABEL: define dso_local noundef i32 @branchHotFnCold( ; HOT70-SAME: i32 noundef [[TMP0:%.*]], ptr noundef readonly [[TMP1:%.*]]) !prof [[PROF16]] { @@ -385,10 +385,10 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; ALL: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} ; ALL: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} ;. -; HOT: [[PROF16]] = !{!"function_entry_count", i64 1000} -; HOT: [[PROF17]] = !{!"function_entry_count", i64 7000} -; HOT: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} -; HOT: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} +; HOT99: [[PROF16]] = !{!"function_entry_count", i64 1000} +; HOT99: [[PROF17]] = !{!"function_entry_count", i64 7000} +; HOT99: [[PROF18]] = !{!"branch_weights", i32 1000, i32 1} +; HOT99: [[PROF19]] = !{!"branch_weights", i32 1, i32 1000} ;. ; HOT70: [[PROF16]] = !{!"function_entry_count", i64 1000} ; HOT70: [[PROF17]] = !{!"function_entry_count", i64 7000} -- cgit v1.1 From dad065dc6e03725aeb60d703cbaccd175a2f1d53 Mon Sep 17 00:00:00 2001 From: Jeff Niu Date: Thu, 4 Apr 2024 22:39:07 +0300 Subject: [mlir][ods] Fix attribute setter gen when properties are on (#87688) ODS was still generating the old `Operation::setAttr` hooks for ODS methods for setting attributes, when the backing implementation of the attributes was changed to properties. No idea how this wasn't noticed until now. --- mlir/test/mlir-tblgen/op-properties.td | 21 ++++++++++++ mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp | 52 +++++++++++++++++++++-------- 2 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 mlir/test/mlir-tblgen/op-properties.td diff --git a/mlir/test/mlir-tblgen/op-properties.td b/mlir/test/mlir-tblgen/op-properties.td new file mode 100644 index 0000000..a484f68 --- /dev/null +++ b/mlir/test/mlir-tblgen/op-properties.td @@ -0,0 +1,21 @@ +// RUN: mlir-tblgen -gen-op-defs -I %S/../../include %s | FileCheck %s + +include "mlir/IR/AttrTypeBase.td" +include "mlir/IR/EnumAttr.td" +include "mlir/IR/OpBase.td" + +def Test_Dialect : Dialect { + let name = "test"; + let cppNamespace = "foobar"; +} +class NS_Op traits = []> : + Op; + +def OpWithAttr : NS_Op<"op_with_attr">{ + let arguments = (ins AnyAttr:$attr, OptionalAttr:$optional); +} + +// CHECK: void OpWithAttr::setAttrAttr(::mlir::Attribute attr) +// CHECK-NEXT: getProperties().attr = attr +// CHECK: void OpWithAttr::setOptionalAttr(::mlir::Attribute attr) +// CHECK-NEXT: getProperties().optional = attr diff --git a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp index 3a69752..843760d 100644 --- a/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp +++ b/mlir/tools/mlir-tblgen/OpDefinitionsGen.cpp @@ -1804,23 +1804,36 @@ void OpEmitter::genAttrGetters() { } void OpEmitter::genAttrSetters() { + bool useProperties = op.getDialect().usePropertiesForAttributes(); + + // Generate the code to set an attribute. + auto emitSetAttr = [&](Method *method, StringRef getterName, + StringRef attrName, StringRef attrVar) { + if (useProperties) { + method->body() << formatv(" getProperties().{0} = {1};", attrName, + attrVar); + } else { + method->body() << formatv(" (*this)->setAttr({0}AttrName(), {1});", + getterName, attrVar); + } + }; + // Generate raw named setter type. This is a wrapper class that allows setting // to the attributes via setters instead of having to use the string interface // for better compile time verification. auto emitAttrWithStorageType = [&](StringRef setterName, StringRef getterName, - Attribute attr) { + StringRef attrName, Attribute attr) { auto *method = opClass.addMethod("void", setterName + "Attr", MethodParameter(attr.getStorageType(), "attr")); if (method) - method->body() << formatv(" (*this)->setAttr({0}AttrName(), attr);", - getterName); + emitSetAttr(method, getterName, attrName, "attr"); }; // Generate a setter that accepts the underlying C++ type as opposed to the // attribute type. auto emitAttrWithReturnType = [&](StringRef setterName, StringRef getterName, - Attribute attr) { + StringRef attrName, Attribute attr) { Attribute baseAttr = attr.getBaseAttr(); if (!canUseUnwrappedRawValue(baseAttr)) return; @@ -1849,9 +1862,8 @@ void OpEmitter::genAttrSetters() { // If the value isn't optional, just set it directly. if (!isOptional) { - method->body() << formatv( - " (*this)->setAttr({0}AttrName(), {1});", getterName, - constBuildAttrFromParam(attr, fctx, "attrValue")); + emitSetAttr(method, getterName, attrName, + constBuildAttrFromParam(attr, fctx, "attrValue")); return; } @@ -1862,13 +1874,25 @@ void OpEmitter::genAttrSetters() { // optional but not in the same way as the others (i.e. it uses bool over // std::optional<>). StringRef paramStr = isUnitAttr ? "attrValue" : "*attrValue"; - const char *optionalCodeBody = R"( + if (!useProperties) { + const char *optionalCodeBody = R"( if (attrValue) return (*this)->setAttr({0}AttrName(), {1}); (*this)->removeAttr({0}AttrName());)"; - method->body() << formatv( - optionalCodeBody, getterName, - constBuildAttrFromParam(baseAttr, fctx, paramStr)); + method->body() << formatv( + optionalCodeBody, getterName, + constBuildAttrFromParam(baseAttr, fctx, paramStr)); + } else { + const char *optionalCodeBody = R"( + auto &odsProp = getProperties().{0}; + if (attrValue) + odsProp = {1}; + else + odsProp = nullptr;)"; + method->body() << formatv( + optionalCodeBody, attrName, + constBuildAttrFromParam(baseAttr, fctx, paramStr)); + } }; for (const NamedAttribute &namedAttr : op.getAttributes()) { @@ -1876,8 +1900,10 @@ void OpEmitter::genAttrSetters() { continue; std::string setterName = op.getSetterName(namedAttr.name); std::string getterName = op.getGetterName(namedAttr.name); - emitAttrWithStorageType(setterName, getterName, namedAttr.attr); - emitAttrWithReturnType(setterName, getterName, namedAttr.attr); + emitAttrWithStorageType(setterName, getterName, namedAttr.name, + namedAttr.attr); + emitAttrWithReturnType(setterName, getterName, namedAttr.name, + namedAttr.attr); } } -- cgit v1.1 From f2a0dd3305beb0e69eb165c31c40214c16278697 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Valentin=20Clement=20=28=E3=83=90=E3=83=AC=E3=83=B3?= =?UTF-8?q?=E3=82=BF=E3=82=A4=E3=83=B3=20=E3=82=AF=E3=83=AC=E3=83=A1?= =?UTF-8?q?=E3=83=B3=29?= Date: Thu, 4 Apr 2024 12:48:08 -0700 Subject: [flang][cuda] Add restriction on assumed size device variable (#87664) According to https://docs.nvidia.com/hpc-sdk/compilers/cuda-fortran-prog-guide/#cfpg-var-qual-attr-device > A device array may be an explicit-shape array, an allocatable array, or an assumed-shape dummy array. Assumed size array are not supported. This patch adds an error for that case. --- flang/lib/Semantics/check-declarations.cpp | 5 +++++ flang/test/Semantics/cuf03.cuf | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/flang/lib/Semantics/check-declarations.cpp b/flang/lib/Semantics/check-declarations.cpp index dec8fee..b2de377 100644 --- a/flang/lib/Semantics/check-declarations.cpp +++ b/flang/lib/Semantics/check-declarations.cpp @@ -948,6 +948,11 @@ void CheckHelper::CheckObjectEntity( "Component '%s' with ATTRIBUTES(DEVICE) must also be allocatable"_err_en_US, symbol.name()); } + if (IsAssumedSizeArray(symbol)) { + messages_.Say( + "Object '%s' with ATTRIBUTES(DEVICE) may not be assumed size"_err_en_US, + symbol.name()); + } break; case common::CUDADataAttr::Managed: if (!IsAutomatic(symbol) && !IsAllocatable(symbol) && diff --git a/flang/test/Semantics/cuf03.cuf b/flang/test/Semantics/cuf03.cuf index 41bfbb7..7384a10 100644 --- a/flang/test/Semantics/cuf03.cuf +++ b/flang/test/Semantics/cuf03.cuf @@ -51,7 +51,8 @@ module m contains attributes(device) subroutine devsubr(n,da) integer, intent(in) :: n - real, device :: da(*) ! ok + !ERROR: Object 'da' with ATTRIBUTES(DEVICE) may not be assumed size + real, device :: da(*) real, managed :: ma(n) ! ok !WARNING: Pointer 'dp' may not be associated in a device subprogram real, device, pointer :: dp -- cgit v1.1 From c91a0a28908ec48f3775cdacede66163eb6339ff Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 12:51:23 -0700 Subject: [builtin][NFC] Remove ClangBuiltin<"__builtin_allow_ubsan_check"> (#87581) We don't need clang builtin for this one. It was copy pasted from `__builtin_allow_runtime_check` RFC: https://discourse.llvm.org/t/rfc-add-llvm-experimental-hot-intrinsic-or-llvm-hot/77641 --- llvm/include/llvm/IR/Intrinsics.td | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index c04f4c5..f0723a6 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -1733,8 +1733,7 @@ def int_ubsantrap : Intrinsic<[], [llvm_i8_ty], // Return true if ubsan check is allowed. def int_allow_ubsan_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_i8_ty], - [IntrInaccessibleMemOnly, IntrWriteMem, ImmArg>, NoUndef]>, - ClangBuiltin<"__builtin_allow_ubsan_check">; + [IntrInaccessibleMemOnly, IntrWriteMem, ImmArg>, NoUndef]>; // Return true if runtime check is allowed. def int_allow_runtime_check : DefaultAttrsIntrinsic<[llvm_i1_ty], [llvm_metadata_ty], -- cgit v1.1 From dfaa144d0ca15839d1d11af472a4b7e2d2c6b7ec Mon Sep 17 00:00:00 2001 From: Florian Mayer Date: Thu, 4 Apr 2024 12:55:05 -0700 Subject: [NFC] [HWASan] clarify FIXME comment (#87689) --- compiler-rt/lib/hwasan/hwasan_thread_list.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/compiler-rt/lib/hwasan/hwasan_thread_list.h b/compiler-rt/lib/hwasan/hwasan_thread_list.h index f36d278..369a1c3 100644 --- a/compiler-rt/lib/hwasan/hwasan_thread_list.h +++ b/compiler-rt/lib/hwasan/hwasan_thread_list.h @@ -55,6 +55,9 @@ static uptr RingBufferSize() { uptr desired_bytes = flags()->stack_history_size * sizeof(uptr); // FIXME: increase the limit to 8 once this bug is fixed: // https://bugs.llvm.org/show_bug.cgi?id=39030 + // Note that we *cannot* do that on Android, as the runtime will indefinitely + // have to support code that is compiled with ashr, which only works with + // shifts up to 6. for (int shift = 0; shift < 7; ++shift) { uptr size = 4096 * (1ULL << shift); if (size >= desired_bytes) -- cgit v1.1 From df69a305253f1d1b4a4066055a07101a4cc03e55 Mon Sep 17 00:00:00 2001 From: Ian Anderson Date: Thu, 4 Apr 2024 13:01:49 -0700 Subject: [Headers] Don't declare unreachable() from stddef.h in C++ (#86748) Even if __need_unreachable is set, stddef.h should not declare unreachable() in C++ because it conflicts with the declaration in \. --- clang/lib/Headers/__stddef_unreachable.h | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/clang/lib/Headers/__stddef_unreachable.h b/clang/lib/Headers/__stddef_unreachable.h index 518580c..61df43e 100644 --- a/clang/lib/Headers/__stddef_unreachable.h +++ b/clang/lib/Headers/__stddef_unreachable.h @@ -7,6 +7,8 @@ *===-----------------------------------------------------------------------=== */ +#ifndef __cplusplus + /* * When -fbuiltin-headers-in-system-modules is set this is a non-modular header * and needs to behave as if it was textual. @@ -15,3 +17,5 @@ (__has_feature(modules) && !__building_module(_Builtin_stddef)) #define unreachable() __builtin_unreachable() #endif + +#endif -- cgit v1.1 From 74373c1bef3d35b03c9dc6186229abf74556b256 Mon Sep 17 00:00:00 2001 From: Victor Campos Date: Thu, 4 Apr 2024 21:29:21 +0100 Subject: Revert "[ARM][Thumb2] Mark BTI-clearing instructions as scheduling region boundaries" (#87699) Reverts llvm/llvm-project#79173 The testcase fails in non-asserts builds. --- llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 19 --- llvm/lib/Target/ARM/Thumb2InstrInfo.h | 4 - llvm/test/CodeGen/ARM/misched-branch-targets.mir | 166 ----------------------- 3 files changed, 189 deletions(-) delete mode 100644 llvm/test/CodeGen/ARM/misched-branch-targets.mir diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp index fc2834c..083f25f 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -286,25 +286,6 @@ MachineInstr *Thumb2InstrInfo::commuteInstructionImpl(MachineInstr &MI, return ARMBaseInstrInfo::commuteInstructionImpl(MI, NewMI, OpIdx1, OpIdx2); } -bool Thumb2InstrInfo::isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const { - // BTI clearing instructions shall not take part in scheduling regions as - // they must stay in their intended place. Although PAC isn't BTI clearing, - // it can be transformed into PACBTI after the pre-RA Machine Scheduling - // has taken place, so its movement must also be restricted. - switch (MI.getOpcode()) { - case ARM::t2BTI: - case ARM::t2PAC: - case ARM::t2PACBTI: - case ARM::t2SG: - return true; - default: - break; - } - return ARMBaseInstrInfo::isSchedulingBoundary(MI, MBB, MF); -} - void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, const DebugLoc &dl, Register DestReg, diff --git a/llvm/lib/Target/ARM/Thumb2InstrInfo.h b/llvm/lib/Target/ARM/Thumb2InstrInfo.h index 8915da8..4bb412f 100644 --- a/llvm/lib/Target/ARM/Thumb2InstrInfo.h +++ b/llvm/lib/Target/ARM/Thumb2InstrInfo.h @@ -68,10 +68,6 @@ public: unsigned OpIdx1, unsigned OpIdx2) const override; - bool isSchedulingBoundary(const MachineInstr &MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const override; - private: void expandLoadStackGuard(MachineBasicBlock::iterator MI) const override; }; diff --git a/llvm/test/CodeGen/ARM/misched-branch-targets.mir b/llvm/test/CodeGen/ARM/misched-branch-targets.mir deleted file mode 100644 index b071fbd..0000000 --- a/llvm/test/CodeGen/ARM/misched-branch-targets.mir +++ /dev/null @@ -1,166 +0,0 @@ -# RUN: llc -o - -run-pass=machine-scheduler -misched=shuffle %s | FileCheck %s -# RUN: llc -o - -run-pass=postmisched %s | FileCheck %s - ---- | - target datalayout = "e-m:e-p:32:32-Fi8-i64:64-v128:64:128-a:0:32-n32-S64" - target triple = "thumbv8.1m.main-arm-none-eabi" - - define i32 @foo_bti() #0 { - entry: - ret i32 0 - } - - define i32 @foo_pac() #0 { - entry: - ret i32 0 - } - - define i32 @foo_pacbti() #0 { - entry: - ret i32 0 - } - - define i32 @foo_setjmp() #0 { - entry: - ret i32 0 - if.then: - ret i32 0 - } - - define i32 @foo_sg() #0 { - entry: - ret i32 0 - } - - declare i32 @setjmp(ptr noundef) #1 - declare void @longjmp(ptr noundef, i32 noundef) #2 - - attributes #0 = { "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - attributes #1 = { nounwind returns_twice "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - attributes #2 = { noreturn nounwind "frame-pointer"="all" "target-cpu"="cortex-m55" "target-features"="+armv8.1-m.main" } - -... ---- -name: foo_bti -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0 - - t2BTI - renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg - tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - -... - -# CHECK-LABEL: name: foo_bti -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: t2BTI - ---- -name: foo_pac -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0, $lr, $r12 - - frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr - $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg - early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg - $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg - $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr - t2AUT implicit $r12, implicit $lr, implicit $sp - tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - -... - -# CHECK-LABEL: name: foo_pac -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0, $lr, $r12 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: frame-setup t2PAC implicit-def $r12, implicit $lr, implicit $sp - ---- -name: foo_pacbti -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0, $lr, $r12 - - frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - $sp = frame-setup t2STMDB_UPD $sp, 14 /* CC::al */, $noreg, killed $r7, killed $lr - $r7 = frame-setup tMOVr killed $sp, 14 /* CC::al */, $noreg - early-clobber $sp = frame-setup t2STR_PRE killed $r12, $sp, -4, 14 /* CC::al */, $noreg - $r12, $sp = frame-destroy t2LDR_POST $sp, 4, 14 /* CC::al */, $noreg - $sp = frame-destroy t2LDMIA_UPD $sp, 14 /* CC::al */, $noreg, def $r7, def $lr - t2AUT implicit $r12, implicit $lr, implicit $sp - tBX_RET 14 /* CC::al */, $noreg, implicit $r0 - -... - -# CHECK-LABEL: name: foo_pacbti -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0, $lr, $r12 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: frame-setup t2PACBTI implicit-def $r12, implicit $lr, implicit $sp - ---- -name: foo_setjmp -tracksRegLiveness: true -body: | - bb.0.entry: - successors: %bb.1 - liveins: $lr - - frame-setup tPUSH 14 /* CC::al */, $noreg, $r7, killed $lr, implicit-def $sp, implicit $sp - $r7 = frame-setup tMOVr $sp, 14 /* CC::al */, $noreg - $sp = frame-setup tSUBspi $sp, 40, 14 /* CC::al */, $noreg - renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg - tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 - t2BTI - renamable $r2 = nsw t2ADDri $r0, 3, 14 /* CC::al */, $noreg, $noreg - tCMPi8 killed renamable $r0, 0, 14 /* CC::al */, $noreg, implicit-def $cpsr - t2IT 0, 2, implicit-def $itstate - renamable $r0 = tMOVi8 $noreg, 0, 0 /* CC::eq */, $cpsr, implicit $itstate - $sp = frame-destroy tADDspi $sp, 40, 0 /* CC::eq */, $cpsr, implicit $itstate - frame-destroy tPOP_RET 0 /* CC::eq */, killed $cpsr, def $r7, def $pc, implicit killed $r0, implicit $sp, implicit killed $itstate - - bb.1.if.then: - renamable $r0 = tMOVr $sp, 14 /* CC::al */, $noreg - renamable $r1, dead $cpsr = tMOVi8 1, 14 /* CC::al */, $noreg - tBL 14 /* CC::al */, $noreg, @longjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit killed $r1, implicit-def $sp - -... - -# CHECK-LABEL: name: foo_setjmp -# CHECK: body: -# CHECK: tBL 14 /* CC::al */, $noreg, @setjmp, csr_aapcs, implicit-def dead $lr, implicit $sp, implicit killed $r0, implicit-def $sp, implicit-def $r0 -# CHECK-NEXT: t2BTI - ---- -name: foo_sg -tracksRegLiveness: true -body: | - bb.0.entry: - liveins: $r0 - - t2SG 14 /* CC::al */, $noreg - renamable $r0, dead $cpsr = nsw tADDi8 killed renamable $r0, 1, 14 /* CC::al */, $noreg - tBX_RET 14 /* CC::al */, $noreg, implicit killed $r0 - -... - -# CHECK-LABEL: name: foo_sg -# CHECK: body: -# CHECK-NEXT: bb.0.entry: -# CHECK-NEXT: liveins: $r0 -# CHECK-NEXT: {{^ +$}} -# CHECK-NEXT: t2SG -- cgit v1.1 From fd2a5c46d8450f0f37a468d4e5f06a9662abae2d Mon Sep 17 00:00:00 2001 From: Kazu Hirata Date: Thu, 4 Apr 2024 13:36:56 -0700 Subject: [memprof] Introduce writeMemProf (NFC) (#87698) This patch refactors the serialization of MemProf data to a switch statement style: switch (Version) { case Version0: return ...; case Version1: return ...; } just like IndexedMemProfRecord::serialize. A reasonable amount of code is shared and factored out to helper functions between writeMemProfV0 and writeMemProfV1 to the extent that doens't hamper readability. --- llvm/lib/ProfileData/InstrProfWriter.cpp | 218 ++++++++++++++++++++----------- 1 file changed, 142 insertions(+), 76 deletions(-) diff --git a/llvm/lib/ProfileData/InstrProfWriter.cpp b/llvm/lib/ProfileData/InstrProfWriter.cpp index 96ab729..72d77d5 100644 --- a/llvm/lib/ProfileData/InstrProfWriter.cpp +++ b/llvm/lib/ProfileData/InstrProfWriter.cpp @@ -414,6 +414,144 @@ static void setSummary(IndexedInstrProf::Summary *TheSummary, TheSummary->setEntry(I, Res[I]); } +// Serialize Schema. +static void writeMemProfSchema(ProfOStream &OS, + const memprof::MemProfSchema &Schema) { + OS.write(static_cast(Schema.size())); + for (const auto Id : Schema) + OS.write(static_cast(Id)); +} + +// Serialize MemProfRecordData. Return RecordTableOffset. +static uint64_t writeMemProfRecords( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + memprof::MemProfSchema *Schema) { + auto RecordWriter = + std::make_unique(memprof::Version1); + RecordWriter->Schema = Schema; + OnDiskChainedHashTableGenerator + RecordTableGenerator; + for (auto &I : MemProfRecordData) { + // Insert the key (func hash) and value (memprof record). + RecordTableGenerator.insert(I.first, I.second, *RecordWriter.get()); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfRecordData.clear(); + + // The call to Emit invokes RecordWriterTrait::EmitData which destructs + // the memprof record copies owned by the RecordTableGenerator. This works + // because the RecordTableGenerator is not used after this point. + return RecordTableGenerator.Emit(OS.OS, *RecordWriter); +} + +// Serialize MemProfFrameData. Return FrameTableOffset. +static uint64_t writeMemProfFrames( + ProfOStream &OS, + llvm::MapVector &MemProfFrameData) { + auto FrameWriter = std::make_unique(); + OnDiskChainedHashTableGenerator + FrameTableGenerator; + for (auto &I : MemProfFrameData) { + // Insert the key (frame id) and value (frame contents). + FrameTableGenerator.insert(I.first, I.second); + } + // Release the memory of this MapVector as it is no longer needed. + MemProfFrameData.clear(); + + return FrameTableGenerator.Emit(OS.OS, *FrameWriter); +} + +static Error writeMemProfV0( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData) { + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfRecordData, &Schema); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); + + uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + +static Error writeMemProfV1( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData) { + OS.write(memprof::Version0); + uint64_t HeaderUpdatePos = OS.tell(); + OS.write(0ULL); // Reserve space for the memprof record table offset. + OS.write(0ULL); // Reserve space for the memprof frame payload offset. + OS.write(0ULL); // Reserve space for the memprof frame table offset. + + auto Schema = memprof::PortableMemInfoBlock::getSchema(); + writeMemProfSchema(OS, Schema); + + uint64_t RecordTableOffset = + writeMemProfRecords(OS, MemProfRecordData, &Schema); + + uint64_t FramePayloadOffset = OS.tell(); + uint64_t FrameTableOffset = writeMemProfFrames(OS, MemProfFrameData); + + uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, FrameTableOffset}; + OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + + return Error::success(); +} + +// The MemProf profile data includes a simple schema +// with the format described below followed by the hashtable: +// uint64_t Version +// uint64_t RecordTableOffset = RecordTableGenerator.Emit +// uint64_t FramePayloadOffset = Stream offset before emitting the frame table +// uint64_t FrameTableOffset = FrameTableGenerator.Emit +// uint64_t Num schema entries +// uint64_t Schema entry 0 +// uint64_t Schema entry 1 +// .... +// uint64_t Schema entry N - 1 +// OnDiskChainedHashTable MemProfRecordData +// OnDiskChainedHashTable MemProfFrameData +static Error writeMemProf( + ProfOStream &OS, + llvm::MapVector + &MemProfRecordData, + llvm::MapVector &MemProfFrameData, + memprof::IndexedVersion MemProfVersionRequested) { + + switch (MemProfVersionRequested) { + case memprof::Version0: + return writeMemProfV0(OS, MemProfRecordData, MemProfFrameData); + case memprof::Version1: + return writeMemProfV1(OS, MemProfRecordData, MemProfFrameData); + case memprof::Version2: + // TODO: Implement. Fall through to the error handling below for now. + break; + } + + return make_error( + instrprof_error::unsupported_version, + formatv("MemProf version {} not supported; " + "requires version between {} and {}, inclusive", + MemProfVersionRequested, memprof::MinimumSupportedVersion, + memprof::MaximumSupportedVersion)); +} + Error InstrProfWriter::writeImpl(ProfOStream &OS) { using namespace IndexedInstrProf; using namespace support; @@ -517,85 +655,13 @@ Error InstrProfWriter::writeImpl(ProfOStream &OS) { // Write the hash table. uint64_t HashTableStart = Generator.Emit(OS.OS, *InfoObj); - // Write the MemProf profile data if we have it. This includes a simple schema - // with the format described below followed by the hashtable: - // uint64_t Version - // uint64_t RecordTableOffset = RecordTableGenerator.Emit - // uint64_t FramePayloadOffset = Stream offset before emitting the frame table - // uint64_t FrameTableOffset = FrameTableGenerator.Emit - // uint64_t Num schema entries - // uint64_t Schema entry 0 - // uint64_t Schema entry 1 - // .... - // uint64_t Schema entry N - 1 - // OnDiskChainedHashTable MemProfRecordData - // OnDiskChainedHashTable MemProfFrameData + // Write the MemProf profile data if we have it. uint64_t MemProfSectionStart = 0; if (static_cast(ProfileKind & InstrProfKind::MemProf)) { - if (MemProfVersionRequested < memprof::MinimumSupportedVersion || - MemProfVersionRequested > memprof::MaximumSupportedVersion) { - return make_error( - instrprof_error::unsupported_version, - formatv("MemProf version {} not supported; " - "requires version between {} and {}, inclusive", - MemProfVersionRequested, memprof::MinimumSupportedVersion, - memprof::MaximumSupportedVersion)); - } - MemProfSectionStart = OS.tell(); - - if (MemProfVersionRequested >= memprof::Version1) - OS.write(MemProfVersionRequested); - - OS.write(0ULL); // Reserve space for the memprof record table offset. - OS.write(0ULL); // Reserve space for the memprof frame payload offset. - OS.write(0ULL); // Reserve space for the memprof frame table offset. - - auto Schema = memprof::PortableMemInfoBlock::getSchema(); - OS.write(static_cast(Schema.size())); - for (const auto Id : Schema) { - OS.write(static_cast(Id)); - } - - auto RecordWriter = - std::make_unique(memprof::Version1); - RecordWriter->Schema = &Schema; - OnDiskChainedHashTableGenerator - RecordTableGenerator; - for (auto &I : MemProfRecordData) { - // Insert the key (func hash) and value (memprof record). - RecordTableGenerator.insert(I.first, I.second, *RecordWriter.get()); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfRecordData.clear(); - - // The call to Emit invokes RecordWriterTrait::EmitData which destructs - // the memprof record copies owned by the RecordTableGenerator. This works - // because the RecordTableGenerator is not used after this point. - uint64_t RecordTableOffset = - RecordTableGenerator.Emit(OS.OS, *RecordWriter); - - uint64_t FramePayloadOffset = OS.tell(); - - auto FrameWriter = std::make_unique(); - OnDiskChainedHashTableGenerator - FrameTableGenerator; - for (auto &I : MemProfFrameData) { - // Insert the key (frame id) and value (frame contents). - FrameTableGenerator.insert(I.first, I.second); - } - // Release the memory of this MapVector as it is no longer needed. - MemProfFrameData.clear(); - - uint64_t FrameTableOffset = FrameTableGenerator.Emit(OS.OS, *FrameWriter); - - uint64_t Header[] = {RecordTableOffset, FramePayloadOffset, - FrameTableOffset}; - uint64_t HeaderUpdatePos = MemProfSectionStart; - if (MemProfVersionRequested >= memprof::Version1) - // The updates go just after the version field. - HeaderUpdatePos += sizeof(uint64_t); - OS.patch({{HeaderUpdatePos, Header, std::size(Header)}}); + if (auto E = writeMemProf(OS, MemProfRecordData, MemProfFrameData, + MemProfVersionRequested)) + return E; } // BinaryIdSection has two parts: -- cgit v1.1 From 5aeb604c7ce417eea110f9803a6c5cb1cdbc5372 Mon Sep 17 00:00:00 2001 From: MaheshRavishankar <1663364+MaheshRavishankar@users.noreply.github.com> Date: Thu, 4 Apr 2024 13:44:24 -0700 Subject: [mlir][SCF] Modernize `coalesceLoops` method to handle `scf.for` loops with iter_args (#87019) As part of this extension this change also does some general cleanup 1) Make all the methods take `RewriterBase` as arguments instead of creating their own builders that tend to crash when used within pattern rewrites 2) Split `coalesePerfectlyNestedLoops` into two separate methods, one for `scf.for` and other for `affine.for`. The templatization didnt seem to be buying much there. Also general clean up of tests. --- mlir/include/mlir/Dialect/Affine/LoopUtils.h | 49 +-- mlir/include/mlir/Dialect/SCF/Utils/Utils.h | 7 +- mlir/include/mlir/IR/PatternMatch.h | 3 + .../Dialect/Affine/Transforms/LoopCoalescing.cpp | 4 +- mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp | 48 +++ .../Dialect/SCF/TransformOps/SCFTransformOps.cpp | 4 +- .../SCF/Transforms/ParallelLoopCollapsing.cpp | 4 +- mlir/lib/Dialect/SCF/Utils/Utils.cpp | 353 ++++++++++++++------- mlir/lib/IR/PatternMatch.cpp | 9 + mlir/test/Dialect/Affine/loop-coalescing.mlir | 71 ++--- mlir/test/Dialect/SCF/transform-op-coalesce.mlir | 211 +++++++++++- mlir/test/Transforms/parallel-loop-collapsing.mlir | 32 +- .../single-parallel-loop-collapsing.mlir | 32 +- 13 files changed, 587 insertions(+), 240 deletions(-) diff --git a/mlir/include/mlir/Dialect/Affine/LoopUtils.h b/mlir/include/mlir/Dialect/Affine/LoopUtils.h index 723a262..d143954 100644 --- a/mlir/include/mlir/Dialect/Affine/LoopUtils.h +++ b/mlir/include/mlir/Dialect/Affine/LoopUtils.h @@ -299,53 +299,8 @@ LogicalResult separateFullTiles(MutableArrayRef nest, SmallVectorImpl *fullTileNest = nullptr); -/// Walk either an scf.for or an affine.for to find a band to coalesce. -template -LogicalResult coalescePerfectlyNestedLoops(LoopOpTy op) { - LogicalResult result(failure()); - SmallVector loops; - getPerfectlyNestedLoops(loops, op); - - // Look for a band of loops that can be coalesced, i.e. perfectly nested - // loops with bounds defined above some loop. - // 1. For each loop, find above which parent loop its operands are - // defined. - SmallVector operandsDefinedAbove(loops.size()); - for (unsigned i = 0, e = loops.size(); i < e; ++i) { - operandsDefinedAbove[i] = i; - for (unsigned j = 0; j < i; ++j) { - if (areValuesDefinedAbove(loops[i].getOperands(), loops[j].getRegion())) { - operandsDefinedAbove[i] = j; - break; - } - } - } - - // 2. Identify bands of loops such that the operands of all of them are - // defined above the first loop in the band. Traverse the nest bottom-up - // so that modifications don't invalidate the inner loops. - for (unsigned end = loops.size(); end > 0; --end) { - unsigned start = 0; - for (; start < end - 1; ++start) { - auto maxPos = - *std::max_element(std::next(operandsDefinedAbove.begin(), start), - std::next(operandsDefinedAbove.begin(), end)); - if (maxPos > start) - continue; - assert(maxPos == start && - "expected loop bounds to be known at the start of the band"); - auto band = llvm::MutableArrayRef(loops.data() + start, end - start); - if (succeeded(coalesceLoops(band))) - result = success(); - break; - } - // If a band was found and transformed, keep looking at the loops above - // the outermost transformed loop. - if (start != end - 1) - end = start + 1; - } - return result; -} +/// Walk an affine.for to find a band to coalesce. +LogicalResult coalescePerfectlyNestedAffineLoops(AffineForOp op); } // namespace affine } // namespace mlir diff --git a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h index 883d11b..bc09cc7 100644 --- a/mlir/include/mlir/Dialect/SCF/Utils/Utils.h +++ b/mlir/include/mlir/Dialect/SCF/Utils/Utils.h @@ -100,11 +100,16 @@ getSCFMinMaxExpr(Value value, SmallVectorImpl &dims, /// `loops` contains a list of perfectly nested loops with bounds and steps /// independent of any loop induction variable involved in the nest. LogicalResult coalesceLoops(MutableArrayRef loops); +LogicalResult coalesceLoops(RewriterBase &rewriter, + MutableArrayRef); + +/// Walk an affine.for to find a band to coalesce. +LogicalResult coalescePerfectlyNestedSCFForLoops(scf::ForOp op); /// Take the ParallelLoop and for each set of dimension indices, combine them /// into a single dimension. combinedDimensions must contain each index into /// loops exactly once. -void collapseParallelLoops(scf::ParallelOp loops, +void collapseParallelLoops(RewriterBase &rewriter, scf::ParallelOp loops, ArrayRef> combinedDimensions); /// Unrolls this for operation by the specified unroll factor. Returns failure diff --git a/mlir/include/mlir/IR/PatternMatch.h b/mlir/include/mlir/IR/PatternMatch.h index 15b1c38..2562301 100644 --- a/mlir/include/mlir/IR/PatternMatch.h +++ b/mlir/include/mlir/IR/PatternMatch.h @@ -15,6 +15,7 @@ #include "llvm/Support/TypeName.h" #include +using llvm::SmallPtrSetImpl; namespace mlir { class PatternRewriter; @@ -704,6 +705,8 @@ public: return user != exceptedUser; }); } + void replaceAllUsesExcept(Value from, Value to, + const SmallPtrSetImpl &preservedUsers); /// Used to notify the listener that the IR failed to be rewritten because of /// a match failure, and provide a callback to populate a diagnostic with the diff --git a/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp index 1dc69ab..05c7707 100644 --- a/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp +++ b/mlir/lib/Dialect/Affine/Transforms/LoopCoalescing.cpp @@ -39,9 +39,9 @@ struct LoopCoalescingPass func::FuncOp func = getOperation(); func.walk([](Operation *op) { if (auto scfForOp = dyn_cast(op)) - (void)coalescePerfectlyNestedLoops(scfForOp); + (void)coalescePerfectlyNestedSCFForLoops(scfForOp); else if (auto affineForOp = dyn_cast(op)) - (void)coalescePerfectlyNestedLoops(affineForOp); + (void)coalescePerfectlyNestedAffineLoops(affineForOp); }); } }; diff --git a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp index af59973..268050a 100644 --- a/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp +++ b/mlir/lib/Dialect/Affine/Utils/LoopUtils.cpp @@ -2765,3 +2765,51 @@ mlir::affine::separateFullTiles(MutableArrayRef inputNest, return success(); } + +LogicalResult affine::coalescePerfectlyNestedAffineLoops(AffineForOp op) { + LogicalResult result(failure()); + SmallVector loops; + getPerfectlyNestedLoops(loops, op); + if (loops.size() <= 1) + return success(); + + // Look for a band of loops that can be coalesced, i.e. perfectly nested + // loops with bounds defined above some loop. + // 1. For each loop, find above which parent loop its operands are + // defined. + SmallVector operandsDefinedAbove(loops.size()); + for (unsigned i = 0, e = loops.size(); i < e; ++i) { + operandsDefinedAbove[i] = i; + for (unsigned j = 0; j < i; ++j) { + if (areValuesDefinedAbove(loops[i].getOperands(), loops[j].getRegion())) { + operandsDefinedAbove[i] = j; + break; + } + } + } + + // 2. Identify bands of loops such that the operands of all of them are + // defined above the first loop in the band. Traverse the nest bottom-up + // so that modifications don't invalidate the inner loops. + for (unsigned end = loops.size(); end > 0; --end) { + unsigned start = 0; + for (; start < end - 1; ++start) { + auto maxPos = + *std::max_element(std::next(operandsDefinedAbove.begin(), start), + std::next(operandsDefinedAbove.begin(), end)); + if (maxPos > start) + continue; + assert(maxPos == start && + "expected loop bounds to be known at the start of the band"); + auto band = llvm::MutableArrayRef(loops.data() + start, end - start); + if (succeeded(coalesceLoops(band))) + result = success(); + break; + } + // If a band was found and transformed, keep looking at the loops above + // the outermost transformed loop. + if (start != end - 1) + end = start + 1; + } + return result; +} diff --git a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp index c091841..7e4faf8 100644 --- a/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp +++ b/mlir/lib/Dialect/SCF/TransformOps/SCFTransformOps.cpp @@ -332,9 +332,9 @@ transform::LoopCoalesceOp::applyToOne(transform::TransformRewriter &rewriter, transform::TransformState &state) { LogicalResult result(failure()); if (scf::ForOp scfForOp = dyn_cast(op)) - result = coalescePerfectlyNestedLoops(scfForOp); + result = coalescePerfectlyNestedSCFForLoops(scfForOp); else if (AffineForOp affineForOp = dyn_cast(op)) - result = coalescePerfectlyNestedLoops(affineForOp); + result = coalescePerfectlyNestedAffineLoops(affineForOp); results.push_back(op); if (failed(result)) { diff --git a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp index a69df02..6ba7020 100644 --- a/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp +++ b/mlir/lib/Dialect/SCF/Transforms/ParallelLoopCollapsing.cpp @@ -28,6 +28,7 @@ namespace { struct TestSCFParallelLoopCollapsing : public impl::TestSCFParallelLoopCollapsingBase< TestSCFParallelLoopCollapsing> { + void runOnOperation() override { Operation *module = getOperation(); @@ -88,6 +89,7 @@ struct TestSCFParallelLoopCollapsing // Only apply the transformation on parallel loops where the specified // transformation is valid, but do NOT early abort in the case of invalid // loops. + IRRewriter rewriter(&getContext()); module->walk([&](scf::ParallelOp op) { if (flattenedCombinedLoops.size() != op.getNumLoops()) { op.emitOpError("has ") @@ -97,7 +99,7 @@ struct TestSCFParallelLoopCollapsing << flattenedCombinedLoops.size() << " iter args."; return; } - collapseParallelLoops(op, combinedLoops); + collapseParallelLoops(rewriter, op, combinedLoops); }); } }; diff --git a/mlir/lib/Dialect/SCF/Utils/Utils.cpp b/mlir/lib/Dialect/SCF/Utils/Utils.cpp index 914aeb4..9279081 100644 --- a/mlir/lib/Dialect/SCF/Utils/Utils.cpp +++ b/mlir/lib/Dialect/SCF/Utils/Utils.cpp @@ -13,6 +13,7 @@ #include "mlir/Dialect/SCF/Utils/Utils.h" #include "mlir/Analysis/SliceAnalysis.h" #include "mlir/Dialect/Arith/IR/Arith.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" #include "mlir/Dialect/Func/IR/FuncOps.h" #include "mlir/Dialect/SCF/IR/SCF.h" #include "mlir/IR/BuiltinOps.h" @@ -472,18 +473,23 @@ LogicalResult mlir::loopUnrollByFactor( return success(); } -/// Return the new lower bound, upper bound, and step in that order. Insert any -/// additional bounds calculations before the given builder and any additional -/// conversion back to the original loop induction value inside the given Block. -static LoopParams normalizeLoop(OpBuilder &boundsBuilder, - OpBuilder &insideLoopBuilder, Location loc, - Value lowerBound, Value upperBound, Value step, - Value inductionVar) { +/// Transform a loop with a strictly positive step +/// for %i = %lb to %ub step %s +/// into a 0-based loop with step 1 +/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { +/// %i = %ii * %s + %lb +/// Insert the induction variable remapping in the body of `inner`, which is +/// expected to be either `loop` or another loop perfectly nested under `loop`. +/// Insert the definition of new bounds immediate before `outer`, which is +/// expected to be either `loop` or its parent in the loop nest. +static LoopParams emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, + Value lb, Value ub, Value step) { + // For non-index types, generate `arith` instructions // Check if the loop is already known to have a constant zero lower bound or // a constant one step. bool isZeroBased = false; - if (auto ubCst = getConstantIntValue(lowerBound)) - isZeroBased = ubCst.value() == 0; + if (auto lbCst = getConstantIntValue(lb)) + isZeroBased = lbCst.value() == 0; bool isStepOne = false; if (auto stepCst = getConstantIntValue(step)) @@ -493,62 +499,90 @@ static LoopParams normalizeLoop(OpBuilder &boundsBuilder, // assuming the step is strictly positive. Update the bounds and the step // of the loop to go from 0 to the number of iterations, if necessary. if (isZeroBased && isStepOne) - return {/*lowerBound=*/lowerBound, /*upperBound=*/upperBound, - /*step=*/step}; + return {lb, ub, step}; - Value diff = boundsBuilder.create(loc, upperBound, lowerBound); + Value diff = isZeroBased ? ub : rewriter.create(loc, ub, lb); Value newUpperBound = - boundsBuilder.create(loc, diff, step); - - Value newLowerBound = - isZeroBased ? lowerBound - : boundsBuilder.create( - loc, boundsBuilder.getZeroAttr(lowerBound.getType())); - Value newStep = - isStepOne ? step - : boundsBuilder.create( - loc, boundsBuilder.getIntegerAttr(step.getType(), 1)); - - // Insert code computing the value of the original loop induction variable - // from the "normalized" one. - Value scaled = - isStepOne - ? inductionVar - : insideLoopBuilder.create(loc, inductionVar, step); - Value shifted = - isZeroBased - ? scaled - : insideLoopBuilder.create(loc, scaled, lowerBound); - - SmallPtrSet preserve{scaled.getDefiningOp(), - shifted.getDefiningOp()}; - inductionVar.replaceAllUsesExcept(shifted, preserve); - return {/*lowerBound=*/newLowerBound, /*upperBound=*/newUpperBound, - /*step=*/newStep}; + isStepOne ? diff : rewriter.create(loc, diff, step); + + Value newLowerBound = isZeroBased + ? lb + : rewriter.create( + loc, rewriter.getZeroAttr(lb.getType())); + Value newStep = isStepOne + ? step + : rewriter.create( + loc, rewriter.getIntegerAttr(step.getType(), 1)); + + return {newLowerBound, newUpperBound, newStep}; } -/// Transform a loop with a strictly positive step -/// for %i = %lb to %ub step %s -/// into a 0-based loop with step 1 -/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 { -/// %i = %ii * %s + %lb -/// Insert the induction variable remapping in the body of `inner`, which is -/// expected to be either `loop` or another loop perfectly nested under `loop`. -/// Insert the definition of new bounds immediate before `outer`, which is -/// expected to be either `loop` or its parent in the loop nest. -static void normalizeLoop(scf::ForOp loop, scf::ForOp outer, scf::ForOp inner) { - OpBuilder builder(outer); - OpBuilder innerBuilder = OpBuilder::atBlockBegin(inner.getBody()); - auto loopPieces = normalizeLoop(builder, innerBuilder, loop.getLoc(), - loop.getLowerBound(), loop.getUpperBound(), - loop.getStep(), loop.getInductionVar()); - - loop.setLowerBound(loopPieces.lowerBound); - loop.setUpperBound(loopPieces.upperBound); - loop.setStep(loopPieces.step); +/// Get back the original induction variable values after loop normalization +static void denormalizeInductionVariable(RewriterBase &rewriter, Location loc, + Value normalizedIv, Value origLb, + Value origStep) { + Value denormalizedIv; + SmallPtrSet preserve; + bool isStepOne = isConstantIntValue(origStep, 1); + bool isZeroBased = isConstantIntValue(origLb, 0); + + Value scaled = normalizedIv; + if (!isStepOne) { + scaled = rewriter.create(loc, normalizedIv, origStep); + preserve.insert(scaled.getDefiningOp()); + } + denormalizedIv = scaled; + if (!isZeroBased) { + denormalizedIv = rewriter.create(loc, scaled, origLb); + preserve.insert(denormalizedIv.getDefiningOp()); + } + + rewriter.replaceAllUsesExcept(normalizedIv, denormalizedIv, preserve); } -LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { +/// Helper function to multiply a sequence of values. +static Value getProductOfIntsOrIndexes(RewriterBase &rewriter, Location loc, + ArrayRef values) { + assert(!values.empty() && "unexpected empty list"); + Value productOf = values.front(); + for (auto v : values.drop_front()) { + productOf = rewriter.create(loc, productOf, v); + } + return productOf; +} + +/// For each original loop, the value of the +/// induction variable can be obtained by dividing the induction variable of +/// the linearized loop by the total number of iterations of the loops nested +/// in it modulo the number of iterations in this loop (remove the values +/// related to the outer loops): +/// iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i. +/// Compute these iteratively from the innermost loop by creating a "running +/// quotient" of division by the range. +static std::pair, SmallPtrSet> +delinearizeInductionVariable(RewriterBase &rewriter, Location loc, + Value linearizedIv, ArrayRef ubs) { + Value previous = linearizedIv; + SmallVector delinearizedIvs(ubs.size()); + SmallPtrSet preservedUsers; + for (unsigned i = 0, e = ubs.size(); i < e; ++i) { + unsigned idx = ubs.size() - i - 1; + if (i != 0) { + previous = rewriter.create(loc, previous, ubs[idx + 1]); + preservedUsers.insert(previous.getDefiningOp()); + } + Value iv = previous; + if (i != e - 1) { + iv = rewriter.create(loc, previous, ubs[idx]); + preservedUsers.insert(iv.getDefiningOp()); + } + delinearizedIvs[idx] = iv; + } + return {delinearizedIvs, preservedUsers}; +} + +LogicalResult mlir::coalesceLoops(RewriterBase &rewriter, + MutableArrayRef loops) { if (loops.size() < 2) return failure(); @@ -557,57 +591,148 @@ LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { // 1. Make sure all loops iterate from 0 to upperBound with step 1. This // allows the following code to assume upperBound is the number of iterations. - for (auto loop : loops) - normalizeLoop(loop, outermost, innermost); + for (auto loop : loops) { + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(outermost); + Value lb = loop.getLowerBound(); + Value ub = loop.getUpperBound(); + Value step = loop.getStep(); + auto newLoopParams = + emitNormalizedLoopBounds(rewriter, loop.getLoc(), lb, ub, step); + + rewriter.modifyOpInPlace(loop, [&]() { + loop.setLowerBound(newLoopParams.lowerBound); + loop.setUpperBound(newLoopParams.upperBound); + loop.setStep(newLoopParams.step); + }); + + rewriter.setInsertionPointToStart(innermost.getBody()); + denormalizeInductionVariable(rewriter, loop.getLoc(), + loop.getInductionVar(), lb, step); + } // 2. Emit code computing the upper bound of the coalesced loop as product // of the number of iterations of all loops. - OpBuilder builder(outermost); + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(outermost); Location loc = outermost.getLoc(); - Value upperBound = outermost.getUpperBound(); - for (auto loop : loops.drop_front()) - upperBound = - builder.create(loc, upperBound, loop.getUpperBound()); + SmallVector upperBounds = llvm::map_to_vector( + loops, [](auto loop) { return loop.getUpperBound(); }); + Value upperBound = getProductOfIntsOrIndexes(rewriter, loc, upperBounds); outermost.setUpperBound(upperBound); - builder.setInsertionPointToStart(outermost.getBody()); - - // 3. Remap induction variables. For each original loop, the value of the - // induction variable can be obtained by dividing the induction variable of - // the linearized loop by the total number of iterations of the loops nested - // in it modulo the number of iterations in this loop (remove the values - // related to the outer loops): - // iv_i = floordiv(iv_linear, product-of-loop-ranges-until-i) mod range_i. - // Compute these iteratively from the innermost loop by creating a "running - // quotient" of division by the range. - Value previous = outermost.getInductionVar(); + rewriter.setInsertionPointToStart(innermost.getBody()); + auto [delinearizeIvs, preservedUsers] = delinearizeInductionVariable( + rewriter, loc, outermost.getInductionVar(), upperBounds); + rewriter.replaceAllUsesExcept(outermost.getInductionVar(), delinearizeIvs[0], + preservedUsers); + + for (int i = loops.size() - 1; i > 0; --i) { + auto outerLoop = loops[i - 1]; + auto innerLoop = loops[i]; + + Operation *innerTerminator = innerLoop.getBody()->getTerminator(); + auto yieldedVals = llvm::to_vector(innerTerminator->getOperands()); + rewriter.eraseOp(innerTerminator); + + SmallVector innerBlockArgs; + innerBlockArgs.push_back(delinearizeIvs[i]); + llvm::append_range(innerBlockArgs, outerLoop.getRegionIterArgs()); + rewriter.inlineBlockBefore(innerLoop.getBody(), outerLoop.getBody(), + Block::iterator(innerLoop), innerBlockArgs); + rewriter.replaceOp(innerLoop, yieldedVals); + } + return success(); +} + +LogicalResult mlir::coalesceLoops(MutableArrayRef loops) { + if (loops.empty()) { + return failure(); + } + IRRewriter rewriter(loops.front().getContext()); + return coalesceLoops(rewriter, loops); +} + +LogicalResult mlir::coalescePerfectlyNestedSCFForLoops(scf::ForOp op) { + LogicalResult result(failure()); + SmallVector loops; + getPerfectlyNestedLoops(loops, op); + + // Look for a band of loops that can be coalesced, i.e. perfectly nested + // loops with bounds defined above some loop. + + // 1. For each loop, find above which parent loop its bounds operands are + // defined. + SmallVector operandsDefinedAbove(loops.size()); for (unsigned i = 0, e = loops.size(); i < e; ++i) { - unsigned idx = loops.size() - i - 1; - if (i != 0) - previous = builder.create(loc, previous, - loops[idx + 1].getUpperBound()); - - Value iv = (i == e - 1) ? previous - : builder.create( - loc, previous, loops[idx].getUpperBound()); - replaceAllUsesInRegionWith(loops[idx].getInductionVar(), iv, - loops.back().getRegion()); + operandsDefinedAbove[i] = i; + for (unsigned j = 0; j < i; ++j) { + SmallVector boundsOperands = {loops[i].getLowerBound(), + loops[i].getUpperBound(), + loops[i].getStep()}; + if (areValuesDefinedAbove(boundsOperands, loops[j].getRegion())) { + operandsDefinedAbove[i] = j; + break; + } + } } - // 4. Move the operations from the innermost just above the second-outermost - // loop, delete the extra terminator and the second-outermost loop. - scf::ForOp second = loops[1]; - innermost.getBody()->back().erase(); - outermost.getBody()->getOperations().splice( - Block::iterator(second.getOperation()), - innermost.getBody()->getOperations()); - second.erase(); - return success(); + // 2. For each inner loop check that the iter_args for the immediately outer + // loop are the init for the immediately inner loop and that the yields of the + // return of the inner loop is the yield for the immediately outer loop. Keep + // track of where the chain starts from for each loop. + SmallVector iterArgChainStart(loops.size()); + iterArgChainStart[0] = 0; + for (unsigned i = 1, e = loops.size(); i < e; ++i) { + // By default set the start of the chain to itself. + iterArgChainStart[i] = i; + auto outerloop = loops[i - 1]; + auto innerLoop = loops[i]; + if (outerloop.getNumRegionIterArgs() != innerLoop.getNumRegionIterArgs()) { + continue; + } + if (!llvm::equal(outerloop.getRegionIterArgs(), innerLoop.getInitArgs())) { + continue; + } + auto outerloopTerminator = outerloop.getBody()->getTerminator(); + if (!llvm::equal(outerloopTerminator->getOperands(), + innerLoop.getResults())) { + continue; + } + iterArgChainStart[i] = iterArgChainStart[i - 1]; + } + + // 3. Identify bands of loops such that the operands of all of them are + // defined above the first loop in the band. Traverse the nest bottom-up + // so that modifications don't invalidate the inner loops. + for (unsigned end = loops.size(); end > 0; --end) { + unsigned start = 0; + for (; start < end - 1; ++start) { + auto maxPos = + *std::max_element(std::next(operandsDefinedAbove.begin(), start), + std::next(operandsDefinedAbove.begin(), end)); + if (maxPos > start) + continue; + if (iterArgChainStart[end - 1] > start) + continue; + auto band = llvm::MutableArrayRef(loops.data() + start, end - start); + if (succeeded(coalesceLoops(band))) + result = success(); + break; + } + // If a band was found and transformed, keep looking at the loops above + // the outermost transformed loop. + if (start != end - 1) + end = start + 1; + } + return result; } void mlir::collapseParallelLoops( - scf::ParallelOp loops, ArrayRef> combinedDimensions) { - OpBuilder outsideBuilder(loops); + RewriterBase &rewriter, scf::ParallelOp loops, + ArrayRef> combinedDimensions) { + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPoint(loops); Location loc = loops.getLoc(); // Presort combined dimensions. @@ -619,25 +744,29 @@ void mlir::collapseParallelLoops( SmallVector normalizedLowerBounds, normalizedSteps, normalizedUpperBounds; for (unsigned i = 0, e = loops.getNumLoops(); i < e; ++i) { - OpBuilder insideLoopBuilder = OpBuilder::atBlockBegin(loops.getBody()); - auto resultBounds = - normalizeLoop(outsideBuilder, insideLoopBuilder, loc, - loops.getLowerBound()[i], loops.getUpperBound()[i], - loops.getStep()[i], loops.getBody()->getArgument(i)); - - normalizedLowerBounds.push_back(resultBounds.lowerBound); - normalizedUpperBounds.push_back(resultBounds.upperBound); - normalizedSteps.push_back(resultBounds.step); + OpBuilder::InsertionGuard g2(rewriter); + rewriter.setInsertionPoint(loops); + Value lb = loops.getLowerBound()[i]; + Value ub = loops.getUpperBound()[i]; + Value step = loops.getStep()[i]; + auto newLoopParams = emitNormalizedLoopBounds(rewriter, loc, lb, ub, step); + normalizedLowerBounds.push_back(newLoopParams.lowerBound); + normalizedUpperBounds.push_back(newLoopParams.upperBound); + normalizedSteps.push_back(newLoopParams.step); + + rewriter.setInsertionPointToStart(loops.getBody()); + denormalizeInductionVariable(rewriter, loc, loops.getInductionVars()[i], lb, + step); } // Combine iteration spaces. SmallVector lowerBounds, upperBounds, steps; - auto cst0 = outsideBuilder.create(loc, 0); - auto cst1 = outsideBuilder.create(loc, 1); + auto cst0 = rewriter.create(loc, 0); + auto cst1 = rewriter.create(loc, 1); for (auto &sortedDimension : sortedDimensions) { - Value newUpperBound = outsideBuilder.create(loc, 1); + Value newUpperBound = rewriter.create(loc, 1); for (auto idx : sortedDimension) { - newUpperBound = outsideBuilder.create( + newUpperBound = rewriter.create( loc, newUpperBound, normalizedUpperBounds[idx]); } lowerBounds.push_back(cst0); @@ -651,7 +780,7 @@ void mlir::collapseParallelLoops( // value. The remainders then determine based on that range, which iteration // of the original induction value this represents. This is a normalized value // that is un-normalized already by the previous logic. - auto newPloop = outsideBuilder.create( + auto newPloop = rewriter.create( loc, lowerBounds, upperBounds, steps, [&](OpBuilder &insideBuilder, Location, ValueRange ploopIVs) { for (unsigned i = 0, e = combinedDimensions.size(); i < e; ++i) { diff --git a/mlir/lib/IR/PatternMatch.cpp b/mlir/lib/IR/PatternMatch.cpp index 5944a0e..286f47c 100644 --- a/mlir/lib/IR/PatternMatch.cpp +++ b/mlir/lib/IR/PatternMatch.cpp @@ -11,6 +11,7 @@ #include "mlir/IR/IRMapping.h" #include "mlir/IR/Iterators.h" #include "mlir/IR/RegionKindInterface.h" +#include "llvm/ADT/SmallPtrSet.h" using namespace mlir; @@ -250,6 +251,14 @@ void RewriterBase::finalizeOpModification(Operation *op) { rewriteListener->notifyOperationModified(op); } +void RewriterBase::replaceAllUsesExcept( + Value from, Value to, const SmallPtrSetImpl &preservedUsers) { + return replaceUsesWithIf(from, to, [&](OpOperand &use) { + Operation *user = use.getOwner(); + return !preservedUsers.contains(user); + }); +} + void RewriterBase::replaceUsesWithIf(Value from, Value to, function_ref functor, bool *allUsesReplaced) { diff --git a/mlir/test/Dialect/Affine/loop-coalescing.mlir b/mlir/test/Dialect/Affine/loop-coalescing.mlir index 9c17fb2..ae0adf5 100644 --- a/mlir/test/Dialect/Affine/loop-coalescing.mlir +++ b/mlir/test/Dialect/Affine/loop-coalescing.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing %s | FileCheck %s +// RUN: mlir-opt -split-input-file -allow-unregistered-dialect -affine-loop-coalescing --cse %s | FileCheck %s // CHECK-LABEL: @one_3d_nest func.func @one_3d_nest() { @@ -239,19 +239,15 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T2:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[IDENTITY]]()[%[[T0]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[PRODUCT]](%[[T3]])[%[[T4]]] -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[IDENTITY]]()[%[[T2]]] -// CHECK-DAG: %[[T7:.*]] = affine.apply #[[PRODUCT]](%[[T5]])[%[[T6]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T7]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T4]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T4]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T0]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T2]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[T9:.*]] = affine.apply #[[FLOOR]](%[[IV]])[%[[T0]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[FLOOR]](%[[T9]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -277,18 +273,16 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T2:.*]] = affine.apply #[[IDENTITY]]()[%[[T0]]] -// CHECK-DAG: %[[T3:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T3]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[SIXTY_FOUR]]() -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[PRODUCT]](%[[T4]])[%[[T5]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T6]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T5]]] -// CHECK-DAG: %[[T8:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T5]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T8]])[%[[T3]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T8]])[%[[T3]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T0]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[SIXTY_FOUR]]() +// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T1]])[%[[T2]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T2]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T0]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T0]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -316,19 +310,16 @@ func.func @coalesce_affine_for(%arg0: memref) { } return } -// CHECK: %[[T0:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T1:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK: %[[T2:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref -// CHECK-DAG: %[[T3:.*]] = affine.min #[[MAP0]]()[%[[T0]]] -// CHECK-DAG: %[[T4:.*]] = affine.apply #[[IDENTITY]]()[%[[T1]]] -// CHECK-DAG: %[[T5:.*]] = affine.apply #[[PRODUCT]](%[[T3]])[%[[T4]]] -// CHECK-DAG: %[[T6:.*]] = affine.apply #[[IDENTITY]]()[%[[T2]]] -// CHECK-DAG: %[[T7:.*]] = affine.apply #[[PRODUCT]](%[[T5]])[%[[T6]]] -// CHECK: affine.for %[[IV:.*]] = 0 to %[[T7]] -// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[T9:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T6]]] -// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T9]])[%[[T4]]] -// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T9]])[%[[T4]]] +// CHECK: %[[DIM:.*]] = memref.dim %arg{{.*}}, %c{{.*}} : memref +// CHECK-DAG: %[[T0:.*]] = affine.min #[[MAP0]]()[%[[DIM]]] +// CHECK-DAG: %[[T1:.*]] = affine.apply #[[IDENTITY]]()[%[[DIM]]] +// CHECK-DAG: %[[T2:.*]] = affine.apply #[[PRODUCT]](%[[T0]])[%[[T1]]] +// CHECK-DAG: %[[T3:.*]] = affine.apply #[[PRODUCT]](%[[T2]])[%[[T1]]] +// CHECK: affine.for %[[IV:.*]] = 0 to %[[T3]] +// CHECK-DAG: %[[K:.*]] = affine.apply #[[MOD]](%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[T5:.*]] = affine.apply #[[DIV]](%[[IV]])[%[[T1]]] +// CHECK-DAG: %[[J:.*]] = affine.apply #[[MOD]](%[[T5]])[%[[T1]]] +// CHECK-DAG: %[[I:.*]] = affine.apply #[[DIV]](%[[T5]])[%[[T1]]] // CHECK-NEXT: "test.foo"(%[[I]], %[[J]], %[[K]]) // CHECK-NEXT: } // CHECK-NEXT: return @@ -342,12 +333,14 @@ func.func @coalesce_affine_for(%arg0: memref) { func.func @test_loops_do_not_get_coalesced() { affine.for %i = 0 to 7 { affine.for %j = #map0(%i) to min #map1(%i) { + "use"(%i, %j) : (index, index) -> () } } return } // CHECK: affine.for %[[IV0:.*]] = 0 to 7 // CHECK-NEXT: affine.for %[[IV1:.*]] = #[[MAP0]](%[[IV0]]) to min #[[MAP1]](%[[IV0]]) +// CHECK-NEXT: "use"(%[[IV0]], %[[IV1]]) // CHECK-NEXT: } // CHECK-NEXT: } // CHECK-NEXT: return diff --git a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir index 2d59331..4dc3e4e 100644 --- a/mlir/test/Dialect/SCF/transform-op-coalesce.mlir +++ b/mlir/test/Dialect/SCF/transform-op-coalesce.mlir @@ -1,4 +1,4 @@ -// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics | FileCheck %s +// RUN: mlir-opt %s -transform-interpreter -split-input-file -verify-diagnostics -allow-unregistered-dialect --cse | FileCheck %s func.func @coalesce_inner() { %c0 = arith.constant 0 : index @@ -14,7 +14,7 @@ func.func @coalesce_inner() { scf.for %k = %i to %j step %c1 { // Inner loop must have been removed. scf.for %l = %i to %j step %c1 { - arith.addi %i, %j : index + "use"(%i, %j) : (index, index) -> () } } {coalesce} } @@ -33,13 +33,19 @@ module attributes {transform.with_named_sequence} { // ----- +// CHECK-DAG: #[[MAP:.+]] = affine_map<() -> (64)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<(d0)[s0] -> (d0 * s0)> +// CHECK-DAG: #[[MAP2:.+]] = affine_map<(d0)[s0] -> (d0 mod s0)> +// CHECK-DAG: #[[MAP3:.+]] = affine_map<(d0)[s0] -> (d0 floordiv s0)> func.func @coalesce_outer(%arg1: memref<64x64xf32, 1>, %arg2: memref<64x64xf32, 1>, %arg3: memref<64x64xf32, 1>) attributes {} { + // CHECK: %[[T0:.+]] = affine.apply #[[MAP]]() + // CHECK: %[[UB:.+]] = affine.apply #[[MAP1]](%[[T0]])[%[[T0]]] // CHECK: affine.for %[[IV1:.+]] = 0 to %[[UB:.+]] { // CHECK-NOT: affine.for %[[IV2:.+]] affine.for %arg4 = 0 to 64 { affine.for %arg5 = 0 to 64 { - // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP0:.+]](%[[IV1]])[%{{.+}}] - // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP1:.+]](%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX0:.+]] = affine.apply #[[MAP2]](%[[IV1]])[%{{.+}}] + // CHECK: %[[IDX1:.+]] = affine.apply #[[MAP3]](%[[IV1]])[%{{.+}}] // CHECK-NEXT: %{{.+}} = affine.load %{{.+}}[%[[IDX1]], %[[IDX0]]] : memref<64x64xf32, 1> %0 = affine.load %arg1[%arg4, %arg5] : memref<64x64xf32, 1> %1 = affine.load %arg2[%arg4, %arg5] : memref<64x64xf32, 1> @@ -96,3 +102,200 @@ module attributes {transform.with_named_sequence} { transform.yield } } + +// ----- + +func.func @tensor_loops(%arg0 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> tensor { + %0 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg1 = %arg0) -> tensor { + %1 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg2 = %arg1) -> tensor { + %2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg3 = %arg2) -> tensor { + %3 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor) + scf.yield %3 : tensor + } + scf.yield %2 : tensor + } + scf.yield %1 : tensor + } {coalesce} + return %0 : tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops( +// CHECK-SAME: %[[ARG0:.+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: %[[NEWUB0_DIFF:.+]] = arith.subi %[[UB0]], %[[LB0]] +// CHECK-DAG: %[[NEWUB0:.+]] = arith.ceildivsi %[[NEWUB0_DIFF]], %[[STEP0]] +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 +// CHECK: %[[NEWUB1_DIFF:.+]] = arith.subi %[[UB1]], %[[LB1]] +// CHECK-DAG: %[[NEWUB1:.+]] = arith.ceildivsi %[[NEWUB1_DIFF]], %[[STEP1]] +// CHECK: %[[NEWUB2_DIFF:.+]] = arith.subi %[[UB2]], %[[LB2]] +// CHECK-DAG: %[[NEWUB2:.+]] = arith.ceildivsi %[[NEWUB2_DIFF]], %[[STEP2]] +// CHECK: %[[PROD1:.+]] = arith.muli %[[NEWUB0]], %[[NEWUB1]] +// CHECK: %[[NEWUB:.+]] = arith.muli %[[PROD1]], %[[NEWUB2]] +// CHECK: %[[RESULT:.+]] = scf.for %[[IV:[a-zA-Z0-9]+]] = %[[C0]] to %[[NEWUB]] step %[[C1]] iter_args(%[[ITER_ARG:.+]] = %[[ARG0]]) +// CHECK: %[[IV2:.+]] = arith.remsi %[[IV]], %[[NEWUB2]] +// CHECK: %[[PREVIOUS:.+]] = arith.divsi %[[IV]], %[[NEWUB2]] +// CHECK: %[[IV1:.+]] = arith.remsi %[[PREVIOUS]], %[[NEWUB1]] +// CHECK: %[[IV0:.+]] = arith.divsi %[[PREVIOUS]], %[[NEWUB1]] +// CHECK: %[[K_STEP:.+]] = arith.muli %[[IV2]], %[[STEP2]] +// CHECK: %[[K:.+]] = arith.addi %[[K_STEP]], %[[LB2]] +// CHECK: %[[J_STEP:.+]] = arith.muli %[[IV1]], %[[STEP1]] +// CHECK: %[[J:.+]] = arith.addi %[[J_STEP]], %[[LB1]] +// CHECK: %[[I_STEP:.+]] = arith.muli %[[IV0]], %[[STEP0]] +// CHECK: %[[I:.+]] = arith.addi %[[I_STEP]], %[[LB0]] +// CHECK: %[[USE:.+]] = "use"(%[[ITER_ARG]], %[[I]], %[[J]], %[[K]]) +// CHECK: scf.yield %[[USE]] +// CHECK: return %[[RESULT]] + +// ----- + +// Coalesce only first two loops, but not the last since the iter_args dont line up +func.func @tensor_loops_first_two(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg5, %arg7 = %arg4) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#0, %2#1 : tensor, tensor + } + scf.yield %1#0, %1#1 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_first_two( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + +// ----- + +// Coalesce only first two loops, but not the last since the yields dont match up +func.func @tensor_loops_first_two_2(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#1, %2#0 : tensor, tensor + } + scf.yield %1#0, %1#1 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_first_two_2( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB2]] to %[[UB2]] step %[[STEP2]] +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + +// ----- + +// Coalesce only last two loops, but not the first since the yields dont match up +func.func @tensor_loops_last_two(%arg0 : tensor, %arg1 : tensor, %lb0 : index, %ub0 : index, %step0 : index, + %lb1 : index, %ub1 : index, %step1 : index, %lb2 : index, %ub2 : index, %step2 : index) -> (tensor, tensor) { + %0:2 = scf.for %i = %lb0 to %ub0 step %step0 iter_args(%arg2 = %arg0, %arg3 = %arg1) -> (tensor, tensor) { + %1:2 = scf.for %j = %lb1 to %ub1 step %step1 iter_args(%arg4 = %arg2, %arg5 = %arg3) -> (tensor, tensor) { + %2:2 = scf.for %k = %lb2 to %ub2 step %step2 iter_args(%arg6 = %arg4, %arg7 = %arg5) -> (tensor, tensor) { + %3:2 = "use"(%arg3, %i, %j, %k) : (tensor, index, index, index) -> (tensor, tensor) + scf.yield %3#0, %3#1 : tensor, tensor + } + scf.yield %2#0, %2#1 : tensor, tensor + } + scf.yield %1#1, %1#0 : tensor, tensor + } {coalesce} + return %0#0, %0#1 : tensor, tensor +} +module attributes {transform.with_named_sequence} { + transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) { + %0 = transform.structured.match ops{["scf.for"]} attributes {coalesce} in %arg1 : (!transform.any_op) -> !transform.any_op + %1 = transform.cast %0 : !transform.any_op to !transform.op<"scf.for"> + %2 = transform.loop.coalesce %1 : (!transform.op<"scf.for">) -> (!transform.op<"scf.for">) + transform.yield + } +} +// CHECK: func.func @tensor_loops_last_two( +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[ARG0:[a-zA-Z0-9]+]]: tensor +// CHECK-SAME: %[[LB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP0:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP1:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[LB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[UB2:[a-zA-Z0-9_]+]]: index +// CHECK-SAME: %[[STEP2:[a-zA-Z0-9_]+]]: index +// CHECK: scf.for %{{[a-zA-Z0-9]+}} = %[[LB0]] to %[[UB0]] step %[[STEP0]] +// CHECK: arith.subi +// CHECK: arith.ceildivsi +// CHECK: arith.subi +// CHECK: arith.ceildivsi +// CHECK: scf.for +// CHECK: arith.remsi +// CHECK: arith.divsi +// CHECK-NOT: scf.for +// CHECK: transform.named_sequence + diff --git a/mlir/test/Transforms/parallel-loop-collapsing.mlir b/mlir/test/Transforms/parallel-loop-collapsing.mlir index 660d7edb..d1c23d5 100644 --- a/mlir/test/Transforms/parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/parallel-loop-collapsing.mlir @@ -1,6 +1,6 @@ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline='builtin.module(func.func(test-scf-parallel-loop-collapsing{collapsed-indices-0=0,3 collapsed-indices-1=1,4 collapsed-indices-2=2}, canonicalize))' | FileCheck %s -// CHECK-LABEL: func @parallel_many_dims() { +// CHECK: func @parallel_many_dims() { func.func @parallel_many_dims() { %c0 = arith.constant 0 : index %c1 = arith.constant 1 : index @@ -28,19 +28,19 @@ func.func @parallel_many_dims() { return } -// CHECK-DAG: [[C12:%.*]] = arith.constant 12 : index -// CHECK-DAG: [[C10:%.*]] = arith.constant 10 : index -// CHECK-DAG: [[C9:%.*]] = arith.constant 9 : index -// CHECK-DAG: [[C6:%.*]] = arith.constant 6 : index -// CHECK-DAG: [[C4:%.*]] = arith.constant 4 : index -// CHECK-DAG: [[C3:%.*]] = arith.constant 3 : index -// CHECK-DAG: [[C2:%.*]] = arith.constant 2 : index -// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index -// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index -// CHECK: scf.parallel ([[NEW_I0:%.*]]) = ([[C0]]) to ([[C4]]) step ([[C1]]) { -// CHECK: [[V0:%.*]] = arith.remsi [[NEW_I0]], [[C2]] : index -// CHECK: [[I0:%.*]] = arith.divsi [[NEW_I0]], [[C2]] : index -// CHECK: [[V2:%.*]] = arith.muli [[V0]], [[C10]] : index -// CHECK: [[I3:%.*]] = arith.addi [[V2]], [[C9]] : index -// CHECK: "magic.op"([[I0]], [[C3]], [[C6]], [[I3]], [[C12]]) : (index, index, index, index, index) -> index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C12:.*]] = arith.constant 12 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C9:.*]] = arith.constant 9 : index +// CHECK-DAG: %[[C10:.*]] = arith.constant 10 : index +// CHECK-DAG: %[[C2:.*]] = arith.constant 2 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK: scf.parallel (%[[NEW_I0:.*]]) = (%[[C0]]) to (%[[C4]]) step (%[[C1]]) { +// CHECK: %[[V0:.*]] = arith.remsi %[[NEW_I0]], %[[C2]] : index +// CHECK: %[[I0:.*]] = arith.divsi %[[NEW_I0]], %[[C2]] : index +// CHECK: %[[V2:.*]] = arith.muli %[[V0]], %[[C10]] +// CHECK: %[[I3:.*]] = arith.addi %[[V2]], %[[C9]] +// CHECK: "magic.op"(%[[I0]], %[[C3]], %[[C6]], %[[I3]], %[[C12]]) : (index, index, index, index, index) -> index // CHECK: scf.reduce diff --git a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir index 542786b..4eed61a 100644 --- a/mlir/test/Transforms/single-parallel-loop-collapsing.mlir +++ b/mlir/test/Transforms/single-parallel-loop-collapsing.mlir @@ -13,22 +13,22 @@ func.func @collapse_to_single() { return } -// CHECK-LABEL: func @collapse_to_single() { -// CHECK-DAG: [[C18:%.*]] = arith.constant 18 : index -// CHECK-DAG: [[C6:%.*]] = arith.constant 6 : index -// CHECK-DAG: [[C3:%.*]] = arith.constant 3 : index -// CHECK-DAG: [[C7:%.*]] = arith.constant 7 : index -// CHECK-DAG: [[C4:%.*]] = arith.constant 4 : index -// CHECK-DAG: [[C1:%.*]] = arith.constant 1 : index -// CHECK-DAG: [[C0:%.*]] = arith.constant 0 : index -// CHECK: scf.parallel ([[NEW_I:%.*]]) = ([[C0]]) to ([[C18]]) step ([[C1]]) { -// CHECK: [[I0_COUNT:%.*]] = arith.remsi [[NEW_I]], [[C6]] : index -// CHECK: [[I1_COUNT:%.*]] = arith.divsi [[NEW_I]], [[C6]] : index -// CHECK: [[V0:%.*]] = arith.muli [[I0_COUNT]], [[C4]] : index -// CHECK: [[I1:%.*]] = arith.addi [[V0]], [[C7]] : index -// CHECK: [[V1:%.*]] = arith.muli [[I1_COUNT]], [[C3]] : index -// CHECK: [[I0:%.*]] = arith.addi [[V1]], [[C3]] : index -// CHECK: "magic.op"([[I0]], [[I1]]) : (index, index) -> index +// CHECK: func @collapse_to_single() { +// CHECK-DAG: %[[C1:.*]] = arith.constant 1 : index +// CHECK-DAG: %[[C0:.*]] = arith.constant 0 : index +// CHECK-DAG: %[[C3:.*]] = arith.constant 3 : index +// CHECK-DAG: %[[C7:.*]] = arith.constant 7 : index +// CHECK-DAG: %[[C4:.*]] = arith.constant 4 : index +// CHECK-DAG: %[[C6:.*]] = arith.constant 6 : index +// CHECK-DAG: %[[C18:.*]] = arith.constant 18 : index +// CHECK: scf.parallel (%[[NEW_I:.*]]) = (%[[C0]]) to (%[[C18]]) step (%[[C1]]) { +// CHECK: %[[I0_COUNT:.*]] = arith.remsi %[[NEW_I]], %[[C6]] : index +// CHECK: %[[I1_COUNT:.*]] = arith.divsi %[[NEW_I]], %[[C6]] : index +// CHECK: %[[V0:.*]] = arith.muli %[[I0_COUNT]], %[[C4]] +// CHECK: %[[I1:.*]] = arith.addi %[[V0]], %[[C7]] +// CHECK: %[[V1:.*]] = arith.muli %[[I1_COUNT]], %[[C3]] +// CHECK: %[[I0:.*]] = arith.addi %[[V1]], %[[C3]] +// CHECK: "magic.op"(%[[I0]], %[[I1]]) : (index, index) -> index // CHECK: scf.reduce // CHECK-NEXT: } // CHECK-NEXT: return -- cgit v1.1 From f5960c168dfe17c7599acea0a7d94a26545f4777 Mon Sep 17 00:00:00 2001 From: Nikolas Klauser Date: Thu, 4 Apr 2024 23:02:19 +0200 Subject: [libc++][NFC] Make __desugars_to a variable template and rename the header to desugars_to.h (#87337) This improves compile times and memory usage slightly and removes some boilerplate. --- libcxx/include/CMakeLists.txt | 2 +- libcxx/include/__algorithm/comp.h | 5 ++- libcxx/include/__algorithm/equal.h | 7 ++-- libcxx/include/__algorithm/mismatch.h | 4 +-- .../pstl_backends/cpu_backends/transform_reduce.h | 8 ++--- libcxx/include/__functional/operations.h | 11 +++--- libcxx/include/__functional/ranges_operations.h | 5 ++- libcxx/include/__numeric/pstl_transform_reduce.h | 2 +- libcxx/include/__type_traits/desugars_to.h | 39 +++++++++++++++++++++ libcxx/include/__type_traits/operation_traits.h | 40 ---------------------- libcxx/include/libcxx.imp | 2 +- libcxx/include/module.modulemap | 2 +- 12 files changed, 61 insertions(+), 66 deletions(-) create mode 100644 libcxx/include/__type_traits/desugars_to.h delete mode 100644 libcxx/include/__type_traits/operation_traits.h diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt index db39803..097a41d 100644 --- a/libcxx/include/CMakeLists.txt +++ b/libcxx/include/CMakeLists.txt @@ -738,6 +738,7 @@ set(files __type_traits/datasizeof.h __type_traits/decay.h __type_traits/dependent_type.h + __type_traits/desugars_to.h __type_traits/disjunction.h __type_traits/enable_if.h __type_traits/extent.h @@ -822,7 +823,6 @@ set(files __type_traits/nat.h __type_traits/negation.h __type_traits/noexcept_move_assign_container.h - __type_traits/operation_traits.h __type_traits/promote.h __type_traits/rank.h __type_traits/remove_all_extents.h diff --git a/libcxx/include/__algorithm/comp.h b/libcxx/include/__algorithm/comp.h index 3902f75..a089375 100644 --- a/libcxx/include/__algorithm/comp.h +++ b/libcxx/include/__algorithm/comp.h @@ -10,8 +10,7 @@ #define _LIBCPP___ALGORITHM_COMP_H #include <__config> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) # pragma GCC system_header @@ -27,7 +26,7 @@ struct __equal_to { }; template -struct __desugars_to<__equal_tag, __equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, __equal_to, _Tp, _Up> = true; // The definition is required because __less is part of the ABI, but it's empty // because all comparisons should be transparent. diff --git a/libcxx/include/__algorithm/equal.h b/libcxx/include/__algorithm/equal.h index c76a16b..1341d9e 100644 --- a/libcxx/include/__algorithm/equal.h +++ b/libcxx/include/__algorithm/equal.h @@ -18,12 +18,11 @@ #include <__iterator/distance.h> #include <__iterator/iterator_traits.h> #include <__string/constexpr_c_functions.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/enable_if.h> -#include <__type_traits/integral_constant.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_volatile.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -47,7 +46,7 @@ _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 boo template ::value && !is_volatile<_Tp>::value && + __enable_if_t<__desugars_to_v<__equal_tag, _BinaryPredicate, _Tp, _Up> && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> _LIBCPP_NODISCARD inline _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 bool @@ -87,7 +86,7 @@ template ::value && __is_identity<_Proj1>::value && + __enable_if_t<__desugars_to_v<__equal_tag, _Pred, _Tp, _Up> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value && !is_volatile<_Tp>::value && !is_volatile<_Up>::value && __libcpp_is_trivially_equality_comparable<_Tp, _Up>::value, int> = 0> diff --git a/libcxx/include/__algorithm/mismatch.h b/libcxx/include/__algorithm/mismatch.h index 8abb273..4ada29e 100644 --- a/libcxx/include/__algorithm/mismatch.h +++ b/libcxx/include/__algorithm/mismatch.h @@ -16,11 +16,11 @@ #include <__algorithm/unwrap_iter.h> #include <__config> #include <__functional/identity.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/invoke.h> #include <__type_traits/is_constant_evaluated.h> #include <__type_traits/is_equality_comparable.h> #include <__type_traits/is_integral.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #include <__utility/pair.h> #include <__utility/unreachable.h> @@ -59,7 +59,7 @@ template ::value && __desugars_to<__equal_tag, _Pred, _Tp, _Tp>::value && + __enable_if_t::value && __desugars_to_v<__equal_tag, _Pred, _Tp, _Tp> && __is_identity<_Proj1>::value && __is_identity<_Proj2>::value, int> = 0> _LIBCPP_NODISCARD _LIBCPP_HIDE_FROM_ABI _LIBCPP_CONSTEXPR_SINCE_CXX20 pair<_Tp*, _Tp*> diff --git a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h index 14a0d76..376abd3 100644 --- a/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h +++ b/libcxx/include/__algorithm/pstl_backends/cpu_backends/transform_reduce.h @@ -14,9 +14,9 @@ #include <__iterator/concepts.h> #include <__iterator/iterator_traits.h> #include <__numeric/transform_reduce.h> +#include <__type_traits/desugars_to.h> #include <__type_traits/is_arithmetic.h> #include <__type_traits/is_execution_policy.h> -#include <__type_traits/operation_traits.h> #include <__utility/move.h> #include #include @@ -37,7 +37,7 @@ template , - __enable_if_t<__desugars_to<__plus_tag, _BinaryOperation, _Tp, _UnaryResult>::value && is_arithmetic_v<_Tp> && + __enable_if_t<__desugars_to_v<__plus_tag, _BinaryOperation, _Tp, _UnaryResult> && is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>, int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp @@ -53,8 +53,8 @@ template , - __enable_if_t::value && - is_arithmetic_v<_Tp> && is_arithmetic_v<_UnaryResult>), + __enable_if_t && is_arithmetic_v<_Tp> && + is_arithmetic_v<_UnaryResult>), int> = 0> _LIBCPP_HIDE_FROM_ABI _Tp __simd_transform_reduce(_Size __n, _Tp __init, _BinaryOperation __binary_op, _UnaryOperation __f) noexcept { diff --git a/libcxx/include/__functional/operations.h b/libcxx/include/__functional/operations.h index 7ddc006..9aa28e4 100644 --- a/libcxx/include/__functional/operations.h +++ b/libcxx/include/__functional/operations.h @@ -13,8 +13,7 @@ #include <__config> #include <__functional/binary_function.h> #include <__functional/unary_function.h> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -41,10 +40,10 @@ _LIBCPP_CTAD_SUPPORTED_FOR_TYPE(plus); // The non-transparent std::plus specialization is only equivalent to a raw plus // operator when we don't perform an implicit conversion when calling it. template -struct __desugars_to<__plus_tag, plus<_Tp>, _Tp, _Tp> : true_type {}; +inline const bool __desugars_to_v<__plus_tag, plus<_Tp>, _Tp, _Tp> = true; template -struct __desugars_to<__plus_tag, plus, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__plus_tag, plus, _Tp, _Up> = true; #if _LIBCPP_STD_VER >= 14 template <> @@ -315,11 +314,11 @@ struct _LIBCPP_TEMPLATE_VIS equal_to { // The non-transparent std::equal_to specialization is only equivalent to a raw equality // comparison when we don't perform an implicit conversion when calling it. template -struct __desugars_to<__equal_tag, equal_to<_Tp>, _Tp, _Tp> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, equal_to<_Tp>, _Tp, _Tp> = true; // In the transparent case, we do not enforce that template -struct __desugars_to<__equal_tag, equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, equal_to, _Tp, _Up> = true; #if _LIBCPP_STD_VER >= 14 template diff --git a/libcxx/include/__functional/ranges_operations.h b/libcxx/include/__functional/ranges_operations.h index 38b2801..a9dffaf 100644 --- a/libcxx/include/__functional/ranges_operations.h +++ b/libcxx/include/__functional/ranges_operations.h @@ -13,8 +13,7 @@ #include <__concepts/equality_comparable.h> #include <__concepts/totally_ordered.h> #include <__config> -#include <__type_traits/integral_constant.h> -#include <__type_traits/operation_traits.h> +#include <__type_traits/desugars_to.h> #include <__utility/forward.h> #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) @@ -98,7 +97,7 @@ struct greater_equal { // For ranges we do not require that the types on each side of the equality // operator are of the same type template -struct __desugars_to<__equal_tag, ranges::equal_to, _Tp, _Up> : true_type {}; +inline const bool __desugars_to_v<__equal_tag, ranges::equal_to, _Tp, _Up> = true; #endif // _LIBCPP_STD_VER >= 20 diff --git a/libcxx/include/__numeric/pstl_transform_reduce.h b/libcxx/include/__numeric/pstl_transform_reduce.h index 2f412d4..07ecf0d 100644 --- a/libcxx/include/__numeric/pstl_transform_reduce.h +++ b/libcxx/include/__numeric/pstl_transform_reduce.h @@ -87,7 +87,7 @@ _LIBCPP_HIDE_FROM_ABI _Tp transform_reduce( } // This overload doesn't get a customization point because it's trivial to detect (through e.g. -// __desugars_to) when specializing the more general variant, which should always be preferred +// __desugars_to_v) when specializing the more general variant, which should always be preferred template + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +// Tags to represent the canonical operations +struct __equal_tag {}; +struct __plus_tag {}; + +// This class template is used to determine whether an operation "desugars" +// (or boils down) to a given canonical operation. +// +// For example, `std::equal_to<>`, our internal `std::__equal_to` helper and +// `ranges::equal_to` are all just fancy ways of representing a transparent +// equality operation, so they all desugar to `__equal_tag`. +// +// This is useful to optimize some functions in cases where we know e.g. the +// predicate being passed is actually going to call a builtin operator, or has +// some specific semantics. +template +inline const bool __desugars_to_v = false; + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___TYPE_TRAITS_DESUGARS_TO_H diff --git a/libcxx/include/__type_traits/operation_traits.h b/libcxx/include/__type_traits/operation_traits.h deleted file mode 100644 index ef6e716..0000000 --- a/libcxx/include/__type_traits/operation_traits.h +++ /dev/null @@ -1,40 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. -// See https://llvm.org/LICENSE.txt for license information. -// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception -// -//===----------------------------------------------------------------------===// - -#ifndef _LIBCPP___TYPE_TRAITS_OPERATION_TRAITS_H -#define _LIBCPP___TYPE_TRAITS_OPERATION_TRAITS_H - -#include <__config> -#include <__type_traits/integral_constant.h> - -#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) -# pragma GCC system_header -#endif - -_LIBCPP_BEGIN_NAMESPACE_STD - -// Tags to represent the canonical operations -struct __equal_tag {}; -struct __plus_tag {}; - -// This class template is used to determine whether an operation "desugars" -// (or boils down) to a given canonical operation. -// -// For example, `std::equal_to<>`, our internal `std::__equal_to` helper and -// `ranges::equal_to` are all just fancy ways of representing a transparent -// equality operation, so they all desugar to `__equal_tag`. -// -// This is useful to optimize some functions in cases where we know e.g. the -// predicate being passed is actually going to call a builtin operator, or has -// some specific semantics. -template -struct __desugars_to : false_type {}; - -_LIBCPP_END_NAMESPACE_STD - -#endif // _LIBCPP___TYPE_TRAITS_OPERATION_TRAITS_H diff --git a/libcxx/include/libcxx.imp b/libcxx/include/libcxx.imp index 2cb1fa5..607f63e 100644 --- a/libcxx/include/libcxx.imp +++ b/libcxx/include/libcxx.imp @@ -734,6 +734,7 @@ { include: [ "<__type_traits/datasizeof.h>", "private", "", "public" ] }, { include: [ "<__type_traits/decay.h>", "private", "", "public" ] }, { include: [ "<__type_traits/dependent_type.h>", "private", "", "public" ] }, + { include: [ "<__type_traits/desugars_to.h>", "private", "", "public" ] }, { include: [ "<__type_traits/disjunction.h>", "private", "", "public" ] }, { include: [ "<__type_traits/enable_if.h>", "private", "", "public" ] }, { include: [ "<__type_traits/extent.h>", "private", "", "public" ] }, @@ -818,7 +819,6 @@ { include: [ "<__type_traits/nat.h>", "private", "", "public" ] }, { include: [ "<__type_traits/negation.h>", "private", "", "public" ] }, { include: [ "<__type_traits/noexcept_move_assign_container.h>", "private", "", "public" ] }, - { include: [ "<__type_traits/operation_traits.h>", "private", "", "public" ] }, { include: [ "<__type_traits/promote.h>", "private", "", "public" ] }, { include: [ "<__type_traits/rank.h>", "private", "", "public" ] }, { include: [ "<__type_traits/remove_all_extents.h>", "private", "", "public" ] }, diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 6d4dcc2..ed45a1b 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -1867,6 +1867,7 @@ module std_private_type_traits_decay [system export std_private_type_traits_add_pointer } module std_private_type_traits_dependent_type [system] { header "__type_traits/dependent_type.h" } +module std_private_type_traits_desugars_to [system] { header "__type_traits/desugars_to.h" } module std_private_type_traits_disjunction [system] { header "__type_traits/disjunction.h" } module std_private_type_traits_enable_if [system] { header "__type_traits/enable_if.h" } module std_private_type_traits_extent [system] { header "__type_traits/extent.h" } @@ -2017,7 +2018,6 @@ module std_private_type_traits_maybe_const [system module std_private_type_traits_nat [system] { header "__type_traits/nat.h" } module std_private_type_traits_negation [system] { header "__type_traits/negation.h" } module std_private_type_traits_noexcept_move_assign_container [system] { header "__type_traits/noexcept_move_assign_container.h" } -module std_private_type_traits_operation_traits [system] { header "__type_traits/operation_traits.h" } module std_private_type_traits_promote [system] { header "__type_traits/promote.h" } module std_private_type_traits_rank [system] { header "__type_traits/rank.h" } module std_private_type_traits_remove_all_extents [system] { header "__type_traits/remove_all_extents.h" } -- cgit v1.1 From 864d2531df8078a5bb49d24383d7219595d23690 Mon Sep 17 00:00:00 2001 From: Slava Zakharin Date: Thu, 4 Apr 2024 14:23:40 -0700 Subject: [flang] Added windows-include.h wrapper to resolve name conflicts. (#87650) The header file includes windows.h in a mean-and-lean way to avoid bringing in names that may conflict with Flang code. --- flang/include/flang/Common/windows-include.h | 25 +++++++++++++++++++++++++ flang/runtime/command.cpp | 4 +--- flang/runtime/execute.cpp | 4 +--- flang/runtime/file.cpp | 3 +-- flang/runtime/lock.h | 4 +--- 5 files changed, 29 insertions(+), 11 deletions(-) create mode 100644 flang/include/flang/Common/windows-include.h diff --git a/flang/include/flang/Common/windows-include.h b/flang/include/flang/Common/windows-include.h new file mode 100644 index 0000000..75ef497 --- /dev/null +++ b/flang/include/flang/Common/windows-include.h @@ -0,0 +1,25 @@ +//===-- include/flang/Common/windows-include.h ------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Wrapper around windows.h that works around the name conflicts. +// +//===----------------------------------------------------------------------===// + +#ifndef FORTRAN_COMMON_WINDOWS_INCLUDE_H_ +#define FORTRAN_COMMON_WINDOWS_INCLUDE_H_ + +#ifdef _WIN32 + +#define WIN32_LEAN_AND_MEAN +#define NOMINMAX + +#include + +#endif // _WIN32 + +#endif // FORTRAN_COMMON_WINDOWS_INCLUDE_H_ diff --git a/flang/runtime/command.cpp b/flang/runtime/command.cpp index fabfe60..b573c5d 100644 --- a/flang/runtime/command.cpp +++ b/flang/runtime/command.cpp @@ -16,9 +16,7 @@ #include #ifdef _WIN32 -#define WIN32_LEAN_AND_MEAN -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" // On Windows GetCurrentProcessId returns a DWORD aka uint32_t #include diff --git a/flang/runtime/execute.cpp b/flang/runtime/execute.cpp index c84930c..0f5bc50 100644 --- a/flang/runtime/execute.cpp +++ b/flang/runtime/execute.cpp @@ -16,9 +16,7 @@ #include #include #ifdef _WIN32 -#define LEAN_AND_MEAN -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" #else #include #include diff --git a/flang/runtime/file.cpp b/flang/runtime/file.cpp index 67764f1..acd5d33d 100644 --- a/flang/runtime/file.cpp +++ b/flang/runtime/file.cpp @@ -17,9 +17,8 @@ #include #include #ifdef _WIN32 -#define NOMINMAX +#include "flang/Common/windows-include.h" #include -#include #else #include #endif diff --git a/flang/runtime/lock.h b/flang/runtime/lock.h index 9f27a82..46ca287 100644 --- a/flang/runtime/lock.h +++ b/flang/runtime/lock.h @@ -25,9 +25,7 @@ #if USE_PTHREADS #include #elif defined(_WIN32) -// Do not define macros for "min" and "max" -#define NOMINMAX -#include +#include "flang/Common/windows-include.h" #else #include #endif -- cgit v1.1 From 03f54725c3f2b26bfef052b9f4b5deee749e5369 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 14:25:53 -0700 Subject: [HWASAN][UBSAN] Don't use default `profile-summary-cutoff-hot` (#87691) Default cutoff is not usefull here. Decision to enable or not sanitizer causes more significant performance impact, than a typical optimizations which rely on `profile-summary-cutoff-hot`. --- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 14 ++++---------- llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp | 13 +++---------- .../test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll | 2 +- llvm/test/Transforms/RemoveTraps/remove-traps.ll | 2 +- 4 files changed, 9 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 88e84ed..8562e2e 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -187,10 +187,8 @@ static cl::opt cl::desc("Use selective instrumentation"), cl::Hidden, cl::init(false)); -static cl::opt ClHotPercentileCutoff( - "hwasan-percentile-cutoff-hot", cl::init(0), - cl::desc("Alternative hot percentile cuttoff." - "By default `-profile-summary-cutoff-hot` is used.")); +static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", + cl::desc("Hot percentile cuttoff.")); static cl::opt ClRandomSkipRate("hwasan-random-skip-rate", cl::init(0), @@ -1512,12 +1510,8 @@ bool HWAddressSanitizer::selectiveInstrumentationShouldSkip( ++NumNoProfileSummaryFuncs; return false; } - auto &BFI = FAM.getResult(F); - return ( - (ClHotPercentileCutoff.getNumOccurrences() && ClHotPercentileCutoff >= 0) - ? PSI->isFunctionHotInCallGraphNthPercentile(ClHotPercentileCutoff, - &F, BFI) - : PSI->isFunctionHotInCallGraph(&F, BFI)); + return PSI->isFunctionHotInCallGraphNthPercentile( + ClHotPercentileCutoff, &F, FAM.getResult(F)); } void HWAddressSanitizer::sanitizeFunction(Function &F, diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp index d87f748..6bcbccd 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -22,10 +22,8 @@ using namespace llvm; #define DEBUG_TYPE "remove-traps" -static cl::opt HotPercentileCutoff( - "remove-traps-percentile-cutoff-hot", cl::init(0), - cl::desc("Alternative hot percentile cuttoff. By default " - "`-profile-summary-cutoff-hot` is used.")); +static cl::opt HotPercentileCutoff("remove-traps-percentile-cutoff-hot", + cl::desc("Hot percentile cuttoff.")); static cl::opt RandomRate("remove-traps-random-rate", cl::init(0.0), @@ -64,12 +62,7 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, uint64_t Count = 0; for (const auto *PR : predecessors(&BB)) Count += BFI.getBlockProfileCount(PR).value_or(0); - - IsHot = - HotPercentileCutoff.getNumOccurrences() - ? (HotPercentileCutoff > 0 && - PSI->isHotCountNthPercentile(HotPercentileCutoff, Count)) - : PSI->isHotCount(Count); + IsHot = PSI->isHotCountNthPercentile(HotPercentileCutoff, Count); } if (ShouldRemove(IsHot)) { diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll index e568f5b..da9bff8 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll @@ -1,7 +1,7 @@ ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ ; RUN: -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT70 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: | FileCheck %s --check-prefix=HOT99 +; RUN: -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=HOT99 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ ; RUN: -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM0 ; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll index e3cca83..4853149 100644 --- a/llvm/test/Transforms/RemoveTraps/remove-traps.ll +++ b/llvm/test/Transforms/RemoveTraps/remove-traps.ll @@ -1,7 +1,7 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4 ; RUN: opt < %s -passes='function(remove-traps)' -S | FileCheck %s --check-prefixes=NOPROFILE ; RUN: opt < %s -passes='function(remove-traps)' -remove-traps-random-rate=1 -S | FileCheck %s --check-prefixes=ALL -; RUN: opt < %s -passes='require,function(remove-traps)' -S | FileCheck %s --check-prefixes=HOT99 +; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=990000 -S | FileCheck %s --check-prefixes=HOT99 ; RUN: opt < %s -passes='require,function(remove-traps)' -remove-traps-percentile-cutoff-hot=700000 -S | FileCheck %s --check-prefixes=HOT70 target triple = "x86_64-pc-linux-gnu" -- cgit v1.1 From 5264c22ef15a308bad420cd85051ed6c4c175bc7 Mon Sep 17 00:00:00 2001 From: Michael Jones Date: Thu, 4 Apr 2024 14:28:56 -0700 Subject: [libc] Temporary math macros fix (#87681) Downstream's having some issues due to math-macros.h issues. These will be fixed properly soon. See https://github.com/llvm/llvm-project/issues/87683 for tracking this tech debt. --- libc/cmake/modules/LLVMLibCCompileOptionRules.cmake | 7 +++++++ libc/include/llvm-libc-macros/math-macros.h | 11 +++++++++++ utils/bazel/llvm-project-overlay/libc/BUILD.bazel | 3 +-- 3 files changed, 19 insertions(+), 2 deletions(-) diff --git a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake index 40a1cfd..5b3a10d 100644 --- a/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake +++ b/libc/cmake/modules/LLVMLibCCompileOptionRules.cmake @@ -43,6 +43,7 @@ function(_get_common_compile_options output_var flags) list(APPEND compile_options "-fpie") if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") # Only add -ffreestanding flag in full build mode. list(APPEND compile_options "-ffreestanding") endif() @@ -126,6 +127,7 @@ function(_get_common_test_compile_options output_var c_test flags) list(APPEND compile_options "-fpie") if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") # Only add -ffreestanding flag in full build mode. list(APPEND compile_options "-ffreestanding") list(APPEND compile_options "-fno-exceptions") @@ -178,5 +180,10 @@ function(_get_hermetic_test_compile_options output_var flags) -Wno-multi-gpu --cuda-path=${LIBC_CUDA_ROOT} -nogpulib -march=${LIBC_GPU_TARGET_ARCHITECTURE} -fno-use-cxa-atexit) endif() + + if(LLVM_LIBC_FULL_BUILD) + list(APPEND compile_options "-DLIBC_FULL_BUILD") + endif() + set(${output_var} ${compile_options} PARENT_SCOPE) endfunction() diff --git a/libc/include/llvm-libc-macros/math-macros.h b/libc/include/llvm-libc-macros/math-macros.h index 1497e32..6046ea9 100644 --- a/libc/include/llvm-libc-macros/math-macros.h +++ b/libc/include/llvm-libc-macros/math-macros.h @@ -9,6 +9,11 @@ #ifndef LLVM_LIBC_MACROS_MATH_MACROS_H #define LLVM_LIBC_MACROS_MATH_MACROS_H +// TODO: Remove this. This is a temporary fix for a downstream problem. +// This cannot be left permanently since it would require downstream users to +// define this macro. +#ifdef LIBC_FULL_BUILD + #include "limits-macros.h" #define FP_NAN 0 @@ -79,4 +84,10 @@ template inline constexpr bool isnan(T x) { #endif +#else // LIBC_FULL_BUILD + +#include + +#endif // LIBC_FULL_BUILD + #endif // LLVM_LIBC_MACROS_MATH_MACROS_H diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index d8375de..b8cd290 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -68,7 +68,6 @@ libc_support_library( name = "llvm_libc_macros_math_macros", hdrs = ["include/llvm-libc-macros/math-macros.h"], deps = [":llvm_libc_macros_limits_macros"], - defines = ["__FP_LOGBNAN_MIN"], ) libc_support_library( @@ -1000,8 +999,8 @@ libc_support_library( libc_support_library( name = "__support_osutil_quick_exit", - hdrs = ["src/__support/OSUtil/quick_exit.h"], srcs = ["src/__support/OSUtil/linux/quick_exit.cpp"], + hdrs = ["src/__support/OSUtil/quick_exit.h"], deps = [ ":__support_osutil_syscall", ], -- cgit v1.1 From 2cbbbf71a4fbff9f7015d83a599428ea9ec9dab7 Mon Sep 17 00:00:00 2001 From: Chenguang Wang Date: Thu, 4 Apr 2024 14:29:29 -0700 Subject: [bazel] Add missing dependency for mlir:SCFUtils (#87711) https://github.com/llvm/llvm-project/commit/5aeb604c7ce417eea110f9803a6c5cb1cdbc5372 https://buildkite.com/llvm-project/upstream-bazel/builds/93859 --- utils/bazel/llvm-project-overlay/mlir/BUILD.bazel | 1 + 1 file changed, 1 insertion(+) diff --git a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel index ddd3e69..497edcf 100644 --- a/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/mlir/BUILD.bazel @@ -4275,6 +4275,7 @@ cc_library( ":AffineDialect", ":Analysis", ":ArithDialect", + ":ArithUtils", ":DialectUtils", ":FuncDialect", ":IR", -- cgit v1.1 From e628581aaab18cbd9bd33a4a42c57da0e018d32f Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 14:30:04 -0700 Subject: [NFC][HWASAN][UBSAN] Remove cl:init from few opts (#87692) They are supposed to be used with `getNumOccurrences`. --- llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 2 +- llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index 8562e2e..ee7301f 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -191,7 +191,7 @@ static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); static cl::opt - ClRandomSkipRate("hwasan-random-skip-rate", cl::init(0), + ClRandomSkipRate("hwasan-random-skip-rate", cl::desc("Probability value in the range [0.0, 1.0] " "to skip instrumentation of a function.")); diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp index 6bcbccd..694dd3c 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -26,7 +26,7 @@ static cl::opt HotPercentileCutoff("remove-traps-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); static cl::opt - RandomRate("remove-traps-random-rate", cl::init(0.0), + RandomRate("remove-traps-random-rate", cl::desc("Probability value in the range [0.0, 1.0] of " "unconditional pseudo-random checks removal.")); -- cgit v1.1 From 18380c522a90dd849caca3da28cd26c4c4c53eaf Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 14:32:30 -0700 Subject: [UBSAN][HWASAN] Remove redundant flags (#87709) Presense of `cutoff-hot` or `random-skip-rate` should be enough to trigger optimization. --- clang/lib/CodeGen/BackendUtil.cpp | 5 +---- clang/test/CodeGen/remote-traps.c | 2 +- .../llvm/Transforms/Instrumentation/RemoveTrapsPass.h | 2 ++ llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp | 9 +++------ llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp | 6 ++++++ .../Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll | 6 ++---- llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll | 12 ++++-------- 7 files changed, 19 insertions(+), 23 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index c8b2a93..e25a176 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -100,9 +100,6 @@ using namespace llvm; namespace llvm { extern cl::opt PrintPipelinePasses; -static cl::opt ClRemoveTraps("clang-remove-traps", cl::Optional, - cl::desc("Insert remove-traps pass.")); - // Experiment to move sanitizers earlier. static cl::opt ClSanitizeOnOptimizerEarlyEP( "sanitizer-early-opt-ep", cl::Optional, @@ -750,7 +747,7 @@ static void addSanitizers(const Triple &TargetTriple, PB.registerOptimizerLastEPCallback(SanitizersCallback); } - if (ClRemoveTraps) { + if (RemoveTrapsPass::IsRequested()) { // We can optimize after inliner, and PGO profile matching. The hook below // is called at the end `buildFunctionSimplificationPipeline`, which called // from `buildInlinerPipeline`, which called after profile matching. diff --git a/clang/test/CodeGen/remote-traps.c b/clang/test/CodeGen/remote-traps.c index 6751afb..6983ddb 100644 --- a/clang/test/CodeGen/remote-traps.c +++ b/clang/test/CodeGen/remote-traps.c @@ -1,5 +1,5 @@ // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -clang-remove-traps -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE +// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE int test(int x) { return x + 123; diff --git a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h index 58f6bbc..bae1584 100644 --- a/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h +++ b/llvm/include/llvm/Transforms/Instrumentation/RemoveTrapsPass.h @@ -25,6 +25,8 @@ namespace llvm { class RemoveTrapsPass : public PassInfoMixin { public: PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM); + + static bool IsRequested(); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp index ee7301f..ad1cd9c 100644 --- a/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/HWAddressSanitizer.cpp @@ -182,11 +182,6 @@ static cl::opt ClWithTls( "platforms that support this"), cl::Hidden, cl::init(true)); -static cl::opt - CSelectiveInstrumentation("hwasan-selective-instrumentation", - cl::desc("Use selective instrumentation"), - cl::Hidden, cl::init(false)); - static cl::opt ClHotPercentileCutoff("hwasan-percentile-cutoff-hot", cl::desc("Hot percentile cuttoff.")); @@ -1503,6 +1498,8 @@ bool HWAddressSanitizer::selectiveInstrumentationShouldSkip( std::bernoulli_distribution D(ClRandomSkipRate); return (D(*Rng)); } + if (!ClHotPercentileCutoff.getNumOccurrences()) + return false; auto &MAMProxy = FAM.getResult(F); ProfileSummaryInfo *PSI = MAMProxy.getCachedResult(*F.getParent()); @@ -1527,7 +1524,7 @@ void HWAddressSanitizer::sanitizeFunction(Function &F, NumTotalFuncs++; - if (CSelectiveInstrumentation && selectiveInstrumentationShouldSkip(F, FAM)) + if (selectiveInstrumentationShouldSkip(F, FAM)) return; NumInstrumentedFuncs++; diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp index 694dd3c..436ccdc 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -41,6 +41,7 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, auto ShouldRemove = [&](bool IsHot) { if (!RandomRate.getNumOccurrences()) return IsHot; + assert(HotPercentileCutoff.getNumOccurrences()); if (!Rng) Rng = F.getParent()->createRNG(F.getName()); std::bernoulli_distribution D(RandomRate); @@ -95,3 +96,8 @@ PreservedAnalyses RemoveTrapsPass::run(Function &F, return removeUbsanTraps(F, BFI, PSI) ? PreservedAnalyses::none() : PreservedAnalyses::all(); } + +bool RemoveTrapsPass::IsRequested() { + return RandomRate.getNumOccurrences() || + HotPercentileCutoff.getNumOccurrences(); +} diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll index 8d96ab0..f75042b 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out-no-ps.ll @@ -1,7 +1,5 @@ -; RUN: opt < %s -passes='require,hwasan' -S \ -; RUN: -hwasan-selective-instrumentation=0 | FileCheck %s --check-prefix=FULL -; RUN: opt < %s -passes='require,hwasan' -S \ -; RUN: -hwasan-selective-instrumentation=1 | FileCheck %s --check-prefix=SELSAN +; RUN: opt < %s -passes='require,hwasan' -S | FileCheck %s --check-prefix=FULL +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=SELSAN ; FULL: @not_sanitized ; FULL-NEXT: %x = alloca i8, i64 4 diff --git a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll index da9bff8..ab3f56d 100644 --- a/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll +++ b/llvm/test/Instrumentation/HWAddressSanitizer/pgo-opt-out.ll @@ -1,11 +1,7 @@ -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT70 -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=HOT99 -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM0 -; RUN: opt < %s -passes='require,hwasan' -S -hwasan-selective-instrumentation=1 \ -; RUN: -hwasan-random-skip-rate=1.0 | FileCheck %s --check-prefix=RANDOM1 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=700000 | FileCheck %s --check-prefix=HOT70 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-percentile-cutoff-hot=990000 | FileCheck %s --check-prefix=HOT99 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-random-skip-rate=0.0 | FileCheck %s --check-prefix=RANDOM0 +; RUN: opt < %s -passes='require,hwasan' -S -hwasan-random-skip-rate=1.0 | FileCheck %s --check-prefix=RANDOM1 ; HOT70: @sanitized ; HOT70-NEXT: @__hwasan_tls -- cgit v1.1 From 697dd93ae30f489e5bcdac74c2ef2d876e3ca064 Mon Sep 17 00:00:00 2001 From: Koakuma Date: Fri, 5 Apr 2024 04:34:07 +0700 Subject: [SPARC] Implement L and H inline asm argument modifiers (#87259) This adds support for using the L and H argument modifiers for twinword operands in inline asm code, such as in: ``` %1 = tail call i64 asm sideeffect "rd %pc, ${0:L} ; srlx ${0:L}, 32, ${0:H}", "={o4}"() ``` This is needed by the Linux kernel. --- llvm/docs/LangRef.rst | 2 ++ llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 44 +++++++++++++++++++++++++++++++ llvm/test/CodeGen/SPARC/inlineasm-bad.ll | 9 +++++++ llvm/test/CodeGen/SPARC/inlineasm.ll | 9 +++++++ 4 files changed, 64 insertions(+) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 1d4ff52..774729c 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -5557,6 +5557,8 @@ RISC-V: Sparc: +- ``L``: Print the low-order register of a two-register operand. +- ``H``: Print the high-order register of a two-register operand. - ``r``: No effect. SystemZ: diff --git a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp index 215a8ea..6855471 100644 --- a/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp +++ b/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp @@ -434,6 +434,50 @@ bool SparcAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, default: // See if this is a generic print operand return AsmPrinter::PrintAsmOperand(MI, OpNo, ExtraCode, O); + case 'L': // Low order register of a twin word register operand + case 'H': // High order register of a twin word register operand + { + const SparcSubtarget &Subtarget = MF->getSubtarget(); + const MachineOperand &MO = MI->getOperand(OpNo); + const SparcRegisterInfo *RegisterInfo = Subtarget.getRegisterInfo(); + Register MOReg = MO.getReg(); + + Register HiReg, LoReg; + if (!SP::IntPairRegClass.contains(MOReg)) { + // If we aren't given a register pair already, find out which pair it + // belongs to. Note that here, the specified register operand, which + // refers to the high part of the twinword, needs to be an even-numbered + // register. + MOReg = RegisterInfo->getMatchingSuperReg(MOReg, SP::sub_even, + &SP::IntPairRegClass); + if (!MOReg) { + SMLoc Loc; + OutContext.reportError( + Loc, "Hi part of pair should point to an even-numbered register"); + OutContext.reportError( + Loc, "(note that in some cases it might be necessary to manually " + "bind the input/output registers instead of relying on " + "automatic allocation)"); + return true; + } + } + + HiReg = RegisterInfo->getSubReg(MOReg, SP::sub_even); + LoReg = RegisterInfo->getSubReg(MOReg, SP::sub_odd); + + Register Reg; + switch (ExtraCode[0]) { + case 'L': + Reg = LoReg; + break; + case 'H': + Reg = HiReg; + break; + } + + O << '%' << SparcInstPrinter::getRegisterName(Reg); + return false; + } case 'f': case 'r': break; diff --git a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll index 5bf2adb..07eb67d 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm-bad.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm-bad.ll @@ -11,3 +11,12 @@ entry: tail call void asm sideeffect "faddq $0,$1,$2", "{f38},{f0},{f0}"(fp128 0xL0, fp128 0xL0, fp128 0xL0) ret void } + +; CHECK-label:test_twinword_error +; CHECK: error: Hi part of pair should point to an even-numbered register +; CHECK: error: (note that in some cases it might be necessary to manually bind the input/output registers instead of relying on automatic allocation) + +define i64 @test_twinword_error(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i1}"() + ret i64 %1 +} diff --git a/llvm/test/CodeGen/SPARC/inlineasm.ll b/llvm/test/CodeGen/SPARC/inlineasm.ll index ec27598..9817d7c 100644 --- a/llvm/test/CodeGen/SPARC/inlineasm.ll +++ b/llvm/test/CodeGen/SPARC/inlineasm.ll @@ -143,3 +143,12 @@ entry: %1 = call double asm sideeffect "faddd $1, $2, $0", "=f,f,e"(i64 0, i64 0) ret void } + +; CHECK-label:test_twinword +; CHECK: rd %asr5, %i1 +; CHECK: srlx %i1, 32, %i0 + +define i64 @test_twinword(){ + %1 = tail call i64 asm sideeffect "rd %asr5, ${0:L} \0A\09 srlx ${0:L}, 32, ${0:H}", "={i0}"() + ret i64 %1 +} -- cgit v1.1 From aa6ba23235b4418d528fe8df4d1cd6d2ad50ceb2 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 14:51:48 -0700 Subject: [UBSAN] Remove invalid assert added with #87709 --- llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp index 436ccdc..b281468 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -41,7 +41,6 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, auto ShouldRemove = [&](bool IsHot) { if (!RandomRate.getNumOccurrences()) return IsHot; - assert(HotPercentileCutoff.getNumOccurrences()); if (!Rng) Rng = F.getParent()->createRNG(F.getName()); std::bernoulli_distribution D(RandomRate); -- cgit v1.1 From 75e7e7d327e0458b151fbe1e6b3a9ac0a5081f76 Mon Sep 17 00:00:00 2001 From: Michael Kruse Date: Fri, 5 Apr 2024 00:02:06 +0200 Subject: [flang] Add --gcc-toolchain and --gcc-install-dir options to flang. (#87360) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The `--gcc-toolchain` and `--gcc-install-dir` option were previously only visible to the Clang driver, but not Flang. These determine which assembler, linker, and libraries to use, e.g. for cross-compiling, and therefore are relevant for Flang as well. Tests are implemented using a mock GCC installation in `basic_cross_linux_tree` copied over from Clang's tests. The Clang driver already contains tests with `--driver-mode=flang` but `flang-new` is an entirely different executable (containing the `-fc1` stage) that should be tested as well. While not all files in `basic_cross_linux_tree` are strictly needed for testing those two driver flags, they will be necessarily needed for future added flags such as `--rtlib`.   Also remove the entry `*.o` in flang's `.gitignore` since `crt*.o` files are needed in the GCC mock installation. Fixes #86729 --- clang/include/clang/Driver/Options.td | 2 ++ flang/.gitignore | 1 - .../usr/bin/i386-unknown-linux-gnu-as | 1 + .../usr/bin/i386-unknown-linux-gnu-ld | 1 + .../usr/bin/i386-unknown-linux-gnu-ld.bfd | 1 + .../usr/bin/i386-unknown-linux-gnu-ld.gold | 1 + .../usr/bin/x86_64-unknown-linux-gnu-as | 1 + .../usr/bin/x86_64-unknown-linux-gnu-ld | 1 + .../usr/bin/x86_64-unknown-linux-gnu-ld.bfd | 1 + .../usr/bin/x86_64-unknown-linux-gnu-ld.gold | 1 + .../usr/i386-unknown-linux-gnu/bin/as | 1 + .../usr/i386-unknown-linux-gnu/bin/ld | 1 + .../usr/i386-unknown-linux-gnu/bin/ld.bfd | 1 + .../usr/i386-unknown-linux-gnu/bin/ld.gold | 1 + .../usr/i386-unknown-linux-gnu/lib/.keep | 0 .../gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o | 0 .../gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o | 0 .../gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o | 0 .../x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o | 0 .../x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o | 0 .../x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o | 0 .../10.2.0/x32/crtfastmath.o | 0 .../usr/x86_64-unknown-linux-gnu/bin/as | 1 + .../usr/x86_64-unknown-linux-gnu/bin/ld | 1 + .../usr/x86_64-unknown-linux-gnu/bin/ld.bfd | 1 + .../usr/x86_64-unknown-linux-gnu/bin/ld.gold | 1 + .../usr/x86_64-unknown-linux-gnu/bin/ld.lld | 0 .../usr/x86_64-unknown-linux-gnu/lib/.keep | 0 flang/test/Driver/driver-help-hidden.f90 | 3 +++ flang/test/Driver/driver-help.f90 | 3 +++ flang/test/Driver/gcc-toolchain-install-dir.f90 | 21 +++++++++++++++++++++ 31 files changed, 45 insertions(+), 1 deletion(-) create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/lib/.keep create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtfastmath.o create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold create mode 100755 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld create mode 100644 flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/lib/.keep create mode 100644 flang/test/Driver/gcc-toolchain-install-dir.f90 diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index c3e90a7..12e8dc7 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -802,9 +802,11 @@ def B : JoinedOrSeparate<["-"], "B">, MetaVarName<"">, HelpText<"Search $prefix$file for executables, libraries, and data files. " "If $prefix is a directory, search $prefix/$file">; def gcc_install_dir_EQ : Joined<["--"], "gcc-install-dir=">, + Visibility<[ClangOption, FlangOption]>, HelpText<"Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. " "Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation">; def gcc_toolchain : Joined<["--"], "gcc-toolchain=">, Flags<[NoXarchOption]>, + Visibility<[ClangOption, FlangOption]>, HelpText<"Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. " "Clang will use the GCC installation with the largest version">; def gcc_triple_EQ : Joined<["--"], "gcc-triple=">, diff --git a/flang/.gitignore b/flang/.gitignore index 4da4ee1..508e70c 100644 --- a/flang/.gitignore +++ b/flang/.gitignore @@ -5,7 +5,6 @@ build root tags TAGS -*.o .nfs* *.sw? *~ diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/i386-unknown-linux-gnu-ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/bin/x86_64-unknown-linux-gnu-ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/bin/ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/lib/.keep b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/i386-unknown-linux-gnu/lib/.keep new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/crtbegin.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbegin.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtbeginT.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/crtfastmath.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbegin.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtbeginT.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtfastmath.o b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/x32/crtfastmath.o new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/as @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.bfd @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold new file mode 100755 index 0000000..b23e556 --- /dev/null +++ b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.gold @@ -0,0 +1 @@ +#!/bin/true diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/bin/ld.lld new file mode 100755 index 0000000..e69de29 diff --git a/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/lib/.keep b/flang/test/Driver/Inputs/basic_cross_linux_tree/usr/x86_64-unknown-linux-gnu/lib/.keep new file mode 100644 index 0000000..e69de29 diff --git a/flang/test/Driver/driver-help-hidden.f90 b/flang/test/Driver/driver-help-hidden.f90 index bf3660d..4405b64 100644 --- a/flang/test/Driver/driver-help-hidden.f90 +++ b/flang/test/Driver/driver-help-hidden.f90 @@ -104,6 +104,9 @@ ! CHECK-NEXT: -fversion-loops-for-stride ! CHECK-NEXT: Create unit-strided versions of loops ! CHECK-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. +! CHECK-NEXT: --gcc-install-dir= +! CHECK-NEXT: Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation +! CHECK-NEXT: --gcc-toolchain= Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Clang will use the GCC installation with the largest version ! CHECK-NEXT: -gline-directives-only Emit debug line info directives only ! CHECK-NEXT: -gline-tables-only Emit debug line number tables only ! CHECK-NEXT: -gpulibc Link the LLVM C Library for GPUs diff --git a/flang/test/Driver/driver-help.f90 b/flang/test/Driver/driver-help.f90 index b4280a4..c80453f 100644 --- a/flang/test/Driver/driver-help.f90 +++ b/flang/test/Driver/driver-help.f90 @@ -92,6 +92,9 @@ ! HELP-NEXT: -fversion-loops-for-stride ! HELP-NEXT: Create unit-strided versions of loops ! HELP-NEXT: -fxor-operator Enable .XOR. as a synonym of .NEQV. +! HELP-NEXT: --gcc-install-dir= +! HELP-NEXT: Use GCC installation in the specified directory. The directory ends with path components like 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Note: executables (e.g. ld) used by the compiler are not overridden by the selected GCC installation +! HELP-NEXT: --gcc-toolchain= Specify a directory where Clang can find 'include' and 'lib{,32,64}/gcc{,-cross}/$triple/$version'. Clang will use the GCC installation with the largest version ! HELP-NEXT: -gline-directives-only Emit debug line info directives only ! HELP-NEXT: -gline-tables-only Emit debug line number tables only ! HELP-NEXT: -gpulibc Link the LLVM C Library for GPUs diff --git a/flang/test/Driver/gcc-toolchain-install-dir.f90 b/flang/test/Driver/gcc-toolchain-install-dir.f90 new file mode 100644 index 0000000..5a073b0 --- /dev/null +++ b/flang/test/Driver/gcc-toolchain-install-dir.f90 @@ -0,0 +1,21 @@ +!! Test that --gcc-toolchain and --gcc-install-dir options are working as expected. +!! It does not test cross-compiling (--sysroot), so crtbegin.o, libgcc/compiler-rt, libc, libFortranRuntime, etc. are not supposed to be affected. +!! PREFIX is captured twice because the driver escapes backslashes (occuring in Windows paths) in the -### output, but not on the "Selected GCC installation:" line. + +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-I386 +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=i386-unknown-linux-gnu --gcc-toolchain=%S/Inputs/basic_cross_linux_tree/usr | FileCheck %s --check-prefix=CHECK-I386 +! CHECK-I386: Selected GCC installation: [[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0 +! CHECK-I386: "-fc1" "-triple" "i386-unknown-linux-gnu" +! CHECK-I386: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}as" +! CHECK-I386: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_i386" +! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0" +! CHECK-I386-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/i386-unknown-linux-gnu/10.2.0/../../../../i386-unknown-linux-gnu/lib" + +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=x86_64-unknown-linux-gnu --gcc-install-dir=%S/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0 | FileCheck %s --check-prefix=CHECK-X86-64 +! RUN: %flang 2>&1 -### -v -o %t %s -no-integrated-as -fuse-ld=ld --target=x86_64-unknown-linux-gnu --gcc-toolchain=%S/Inputs/basic_cross_linux_tree/usr | FileCheck %s --check-prefix=CHECK-X86-64 +! CHECK-X86-64: Selected GCC installation: [[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0 +! CHECK-X86-64: "-fc1" "-triple" "x86_64-unknown-linux-gnu" +! CHECK-X86-64: "[[PREFIX:[^"]+]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}as" "--64" +! CHECK-X86-64: "[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/bin{{/|\\\\}}ld" {{.*}} "-m" "elf_x86_64" +! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0" +! CHECK-X86-64-SAME: "-L[[PREFIX]]/Inputs/basic_cross_linux_tree/usr/lib/gcc/x86_64-unknown-linux-gnu/10.2.0/../../../../x86_64-unknown-linux-gnu/lib" -- cgit v1.1 From 4b077ed58e3b61e29ae2dbc157fc8122fda1f36c Mon Sep 17 00:00:00 2001 From: Keith Smiley Date: Thu, 4 Apr 2024 15:10:52 -0700 Subject: [bazel] Add support for building lldb (#87589) This adds build configuration for building LLDB on macOS and Linux. It uses a default subset of features that should work out of the box with macOS + Ubuntu. It is notably missing python support right now, although some of the scaffolding is there, because of the complexity of linking a python dylib, especially if you plan to distribute the resulting liblldb.so. Most of this build file is pretty simple, one of the unfortunate patterns I had to use was to split the header and sources cc_library targets to break circular dependencies. --- utils/bazel/.bazelrc | 11 +- utils/bazel/configure.bzl | 10 +- utils/bazel/llvm-project-overlay/lldb/BUILD.bazel | 848 +++++++ .../lldb/source/Plugins/BUILD.bazel | 2319 ++++++++++++++++++++ .../lldb/source/Plugins/plugin_config.bzl | 104 + 5 files changed, 3289 insertions(+), 3 deletions(-) create mode 100644 utils/bazel/llvm-project-overlay/lldb/BUILD.bazel create mode 100644 utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel create mode 100644 utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl diff --git a/utils/bazel/.bazelrc b/utils/bazel/.bazelrc index 1d7cf4a..5a6d188 100644 --- a/utils/bazel/.bazelrc +++ b/utils/bazel/.bazelrc @@ -9,10 +9,16 @@ # Prevent invalid caching if input files are modified during a build. build --experimental_guard_against_concurrent_changes +# Automatically enable --config=(linux|macos|windows) based on the host +build --enable_platform_specific_config + # In opt mode, bazel by default builds both PIC and non-PIC object files for # tests vs binaries. We don't need this feature and it slows down opt builds # considerably. -build --force_pic +# TODO: Remove platform specifics we're on bazel 7.x https://github.com/bazelbuild/bazel/issues/12439 +# Apple platforms always enable pic so this flag is unnecessary anyways +build:linux --force_pic +build:windows --force_pic # Shared objects take up more space. With fast linkers and binaries that aren't # super large, the benefits of shared objects are minimal. @@ -34,6 +40,9 @@ build --incompatible_no_implicit_file_export # eventually become the default common --incompatible_disallow_empty_glob +# TODO: Remove once we move to bazel 7.x +build --experimental_cc_shared_library + ############################################################################### # Options to select different strategies for linking potential dependent # libraries. The default leaves it disabled. diff --git a/utils/bazel/configure.bzl b/utils/bazel/configure.bzl index d6cd6aa..717b86d 100644 --- a/utils/bazel/configure.bzl +++ b/utils/bazel/configure.bzl @@ -4,8 +4,6 @@ """Helper macros to configure the LLVM overlay project.""" -load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") - # Directory of overlay files relative to WORKSPACE DEFAULT_OVERLAY_PATH = "llvm-project-overlay" @@ -77,6 +75,7 @@ def _extract_cmake_settings(repository_ctx, llvm_cmake): "LLVM_VERSION_MAJOR": None, "LLVM_VERSION_MINOR": None, "LLVM_VERSION_PATCH": None, + "LLVM_VERSION_SUFFIX": None, } # It would be easier to use external commands like sed(1) and python. @@ -126,6 +125,13 @@ def _extract_cmake_settings(repository_ctx, llvm_cmake): c["LLVM_VERSION_PATCH"], ) + c["PACKAGE_VERSION"] = "{}.{}.{}{}".format( + c["LLVM_VERSION_MAJOR"], + c["LLVM_VERSION_MINOR"], + c["LLVM_VERSION_PATCH"], + c["LLVM_VERSION_SUFFIX"], + ) + return c def _write_dict_to_file(repository_ctx, filepath, header, vars): diff --git a/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel new file mode 100644 index 0000000..300f279 --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/BUILD.bazel @@ -0,0 +1,848 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +load("@bazel_skylib//lib:selects.bzl", "selects") +load("@bazel_skylib//rules:common_settings.bzl", "bool_flag") +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("//:vars.bzl", "LLVM_VERSION_MAJOR", "LLVM_VERSION_MINOR", "LLVM_VERSION_PATCH", "LLVM_VERSION_SUFFIX", "PACKAGE_VERSION") +load("//lldb/source/Plugins:plugin_config.bzl", "DEFAULT_PLUGINS", "DEFAULT_SCRIPT_PLUGINS", "OBJCPP_COPTS") +load("//mlir:tblgen.bzl", "gentbl_cc_library", "td_library") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], +) + +licenses(["notice"]) + +exports_files(["LICENSE.TXT"]) + +bool_flag( + name = "enable_curses", + build_setting_default = False, +) + +config_setting( + name = "curses_enabled_setting", + flag_values = {":enable_curses": "true"}, +) + +selects.config_setting_group( + name = "curses_enabled", + match_any = [ + ":curses_enabled_setting", + "@platforms//os:macos", + ], +) + +bool_flag( + name = "enable_libedit", + build_setting_default = False, +) + +config_setting( + name = "libedit_enabled_setting", + flag_values = {":enable_libedit": "true"}, +) + +selects.config_setting_group( + name = "libedit_enabled", + match_any = [ + ":libedit_enabled_setting", + "@platforms//os:macos", + ], +) + +_VERSION_SUBSTITUTIONS = { + "@LLDB_VERSION@": PACKAGE_VERSION, + "@LLDB_VERSION_MAJOR@": LLVM_VERSION_MAJOR, + "@LLDB_VERSION_MINOR@": LLVM_VERSION_MINOR, + "@LLDB_VERSION_PATCH@": LLVM_VERSION_PATCH, + "@LLDB_VERSION_SUFFIX@": LLVM_VERSION_SUFFIX, + '#cmakedefine LLDB_FULL_VERSION_STRING "@LLDB_FULL_VERSION_STRING@"': "/* #undef LLDB_FULL_VERSION_STRING */", +} + +genrule( + name = "vcs_version_gen", + outs = ["VCSVersion.inc"], + cmd = "echo '#undef LLDB_REVISION' >> $@\n" + + "echo '#undef LLDB_REPOSITORY' >> $@\n", +) + +expand_template( + name = "version_inc_gen", + out = "Version/Version.inc", + substitutions = _VERSION_SUBSTITUTIONS, + template = "include/lldb/Version/Version.inc.in", +) + +cc_library( + name = "Version", + srcs = [ + "source/Version/Version.cpp", + ":vcs_version_gen", + ":version_inc_gen", + ], + hdrs = ["include/lldb/Version/Version.h"], + features = ["-layering_check"], # Version.inc breaks this unintentionally + strip_include_prefix = "include", + deps = ["//clang:basic"], +) + +expand_template( + name = "ConfigHeader", + out = "include/lldb/Host/Config.h", + substitutions = { + "#cmakedefine01 HAVE_PTSNAME_R": "#define HAVE_PTSNAME_R 1", + "#cmakedefine01 LLDB_ENABLE_TERMIOS": "#define LLDB_ENABLE_TERMIOS 1", + + # TODO: Add LZMA support by including the library in bazel + "#cmakedefine01 LLDB_ENABLE_LZMA": "#define LLDB_ENABLE_LZMA 0", + + # TODO: lua support + "#cmakedefine01 LLDB_ENABLE_LUA": "#define LLDB_ENABLE_LUA 0", + + # TODO: python support + "#cmakedefine01 LLDB_ENABLE_PYTHON": "#define LLDB_ENABLE_PYTHON 0", + # Only enabled by default on Windows + "#cmakedefine01 LLDB_EMBED_PYTHON_HOME": "#define LLDB_EMBED_PYTHON_HOME 0", + # Only used if LLDB_EMBED_PYTHON_HOME is true + "#cmakedefine LLDB_PYTHON_HOME R\"(${LLDB_PYTHON_HOME})\"": "#define LLDB_PYTHON_HOME \"\"", + + # Unsupported + "#cmakedefine01 CURSES_HAVE_NCURSES_CURSES_H": "#define CURSES_HAVE_NCURSES_CURSES_H 0", + "#cmakedefine01 LLDB_ENABLE_FBSDVMCORE": "#define LLDB_ENABLE_FBSDVMCORE 0", + + # Defaults that could be configurable if needed + "#cmakedefine01 LLDB_ENABLE_POSIX": "#define LLDB_ENABLE_POSIX 1", + "#cmakedefine LLDB_GLOBAL_INIT_DIRECTORY R\"(${LLDB_GLOBAL_INIT_DIRECTORY})\"": "#define LLDB_GLOBAL_INIT_DIRECTORY \"\"", + "${LLDB_INSTALL_LIBDIR_BASENAME}": "lib", + "${LLDB_BUG_REPORT_URL}": "", + } | select({ + "@platforms//os:macos": { + "#cmakedefine HAVE_LIBCOMPRESSION": "#define HAVE_LIBCOMPRESSION", + "#cmakedefine01 HAVE_NR_PROCESS_VM_READV": "#define HAVE_NR_PROCESS_VM_READV 0", + "#cmakedefine01 HAVE_PPOLL": "#define HAVE_PPOLL 0", + "#cmakedefine01 HAVE_PROCESS_VM_READV": "#define HAVE_PROCESS_VM_READV 0", + "#cmakedefine01 HAVE_SYS_EVENT_H": "#define HAVE_SYS_EVENT_H 1", + "#cmakedefine01 LLDB_ENABLE_LIBXML2": "#define LLDB_ENABLE_LIBXML2 1", + "#cmakedefine01 LLDB_HAVE_EL_RFUNC_T": "#define LLDB_HAVE_EL_RFUNC_T 0", + }, + "@platforms//os:linux": { + "#cmakedefine HAVE_LIBCOMPRESSION": "/* #undef HAVE_LIBCOMPRESSION */", + "#cmakedefine01 HAVE_NR_PROCESS_VM_READV": "#define HAVE_NR_PROCESS_VM_READV 1", + "#cmakedefine01 HAVE_PPOLL": "#define HAVE_PPOLL 1", + "#cmakedefine01 HAVE_PROCESS_VM_READV": "#define HAVE_PROCESS_VM_READV 1", + "#cmakedefine01 HAVE_SYS_EVENT_H": "#define HAVE_SYS_EVENT_H 0", + "#cmakedefine01 LLDB_ENABLE_LIBXML2": "#define LLDB_ENABLE_LIBXML2 0", + "#cmakedefine01 LLDB_HAVE_EL_RFUNC_T": "#define LLDB_HAVE_EL_RFUNC_T 1", + }, + }) | select({ + ":curses_enabled": { + "#cmakedefine01 LLDB_ENABLE_CURSES": "#define LLDB_ENABLE_CURSES 1", + }, + "//conditions:default": { + "#cmakedefine01 LLDB_ENABLE_CURSES": "#define LLDB_ENABLE_CURSES 0", + }, + }) | select({ + ":libedit_enabled": { + "#cmakedefine01 LLDB_EDITLINE_USE_WCHAR": "#define LLDB_EDITLINE_USE_WCHAR 1", + "#cmakedefine01 LLDB_ENABLE_LIBEDIT": "#define LLDB_ENABLE_LIBEDIT 1", + }, + "//conditions:default": { + "#cmakedefine01 LLDB_EDITLINE_USE_WCHAR": "#define LLDB_EDITLINE_USE_WCHAR 0", + "#cmakedefine01 LLDB_ENABLE_LIBEDIT": "#define LLDB_ENABLE_LIBEDIT 0", + }, + }), + template = "include/lldb/Host/Config.h.cmake", +) + +cc_library( + name = "Config", + hdrs = [":ConfigHeader"], + include_prefix = "lldb/Host", +) + +cc_binary( + name = "lldb-tblgen", + srcs = glob([ + "utils/TableGen/*.cpp", + "utils/TableGen/*.h", + ]), + deps = [ + "//llvm:CodeGenTypes", + "//llvm:Support", + "//llvm:TableGen", + "//llvm:TargetParser", + "//llvm:config", + ], +) + +cc_library( + name = "API", + srcs = glob([ + "source/API/**/*.cpp", + "source/API/**/*.h", + ]), + hdrs = glob(["include/lldb/API/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Breakpoint", + ":Commands", + ":Core", + ":DataFormatters", + ":Expression", + ":Headers", + ":Host", + ":Initialization", + ":InterpreterHeaders", + ":Symbol", + ":SymbolHeaders", + ":Target", + ":TargetHeaders", + ":Utility", + ":Version", + "//lldb/source/Plugins:PluginExpressionParserClangHeaders", + "//lldb/source/Plugins:PluginsConfig", + "//llvm:ExecutionEngine", + "//llvm:MCJIT", + "//llvm:Support", + "//llvm:config", + ], +) + +cc_library( + name = "Breakpoint", + srcs = glob(["source/Breakpoint/**/*.cpp"]), + hdrs = glob(["include/lldb/Breakpoint/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":DataFormattersHeaders", + ":Expression", + ":Headers", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "DataFormatters", + srcs = glob(["source/DataFormatters/**/*.cpp"]), + hdrs = glob(["include/lldb/DataFormatters/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":CoreHeaders", + ":Headers", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "Expression", + srcs = glob(["source/Expression/**/*.cpp"]), + hdrs = glob(["include/lldb/Expression/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//lldb/source/Plugins:PluginSymbolFileDWARFHeaders", + "//llvm:Core", + "//llvm:DebugInfoDWARF", + "//llvm:ExecutionEngine", + "//llvm:Support", + ], +) + +cc_library( + name = "Initialization", + srcs = glob(["source/Initialization/**/*.cpp"]), + hdrs = glob(["include/lldb/Initialization/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Core", + ":Headers", + ":Host", + ":TargetHeaders", + ":Utility", + ":Version", + "//lldb/source/Plugins:PluginProcessGDBRemote", + "//lldb/source/Plugins:PluginProcessPOSIX", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "InterpreterProperties", + strip_include_prefix = "source/Interpreter", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Interpreter/InterpreterProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Interpreter/InterpreterPropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Interpreter/InterpreterProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "APIHeaders", + hdrs = glob(["include/lldb/API/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "InterpreterHeaders", + hdrs = glob(["include/lldb/Interpreter/**/*.h"]), + strip_include_prefix = "include", + deps = [":APIHeaders"], +) + +cc_library( + name = "BreakpointHeaders", + hdrs = glob(["include/lldb/Breakpoint/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "ExpressionHeaders", + hdrs = glob(["include/lldb/Expression/**/*.h"]), + strip_include_prefix = "include", + deps = ["//llvm:ExecutionEngine"], +) + +cc_library( + name = "DataFormattersHeaders", + hdrs = glob(["include/lldb/DataFormatters/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "Interpreter", + srcs = glob(["source/Interpreter/**/*.cpp"]), + deps = [ + ":API", + ":Commands", + ":Core", + ":DataFormatters", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":InterpreterProperties", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//llvm:Support", + ], +) + +td_library( + name = "CommandsTdFiles", + srcs = glob(["source/Commands/**/*.td"]), +) + +gentbl_cc_library( + name = "CommandOptions", + strip_include_prefix = "source/Commands", + tbl_outs = [ + ( + ["-gen-lldb-option-defs"], + "source/Commands/CommandOptions.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Commands/Options.td", + deps = [":CommandsTdFiles"], +) + +cc_library( + name = "Commands", + srcs = glob(["source/Commands/**/*.cpp"]), + hdrs = glob(["source/Commands/**/*.h"]), + strip_include_prefix = "source", + deps = [ + ":Breakpoint", + ":CommandOptions", + ":Core", + ":DataFormatters", + ":Expression", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":Target", + ":TargetHeaders", + ":Utility", + ":Version", + "//clang:codegen", + "//clang:frontend", + "//llvm:Support", + ], +) + +cc_library( + name = "SymbolHeaders", + hdrs = glob(["include/lldb/Symbol/**/*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "Symbol", + srcs = glob(["source/Symbol/**/*.cpp"]), + deps = [ + ":Core", + ":Expression", + ":Headers", + ":Host", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + ":UtilityPrivateHeaders", + "//llvm:DebugInfo", + "//llvm:DebugInfoDWARF", + "//llvm:Support", + ], +) + +cc_library( + name = "HostMacOSXHeaders", + hdrs = glob([ + "include/lldb/Host/*.h", + "include/lldb/Host/macosx/*.h", + "include/lldb/Host/posix/*.h", + ]), + strip_include_prefix = "include", + deps = [":Utility"], +) + +cc_library( + name = "HostMacOSXPrivateHeaders", + hdrs = glob([ + "source/Host/macosx/cfcpp/*.h", + "source/Host/macosx/objcxx/*.h", + ]), + strip_include_prefix = "source", + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":Utility"], +) + +objc_library( + name = "HostMacOSXObjCXX", + srcs = glob([ + "source/Host/macosx/objcxx/*.mm", + ]), + copts = OBJCPP_COPTS, + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":HostMacOSXHeaders", + ":HostMacOSXPrivateHeaders", + ], +) + +cc_library( + name = "Host", + srcs = glob([ + "source/Host/common/**/*.cpp", + ]) + select({ + "@platforms//os:linux": glob( + [ + "source/Host/posix/**/*.cpp", + "source/Host/linux/**/*.cpp", + ], + exclude = ["source/Host/linux/android/**/*.cpp"], + ), + "@platforms//os:macos": glob( + [ + "source/Host/macosx/cfcpp/*.cpp", + "source/Host/posix/**/*.cpp", + ], + ), + }), + hdrs = [":ConfigHeader"] + glob([ + "include/lldb/Host/*.h", + "include/lldb/Host/common/*.h", + ]) + select({ + "@platforms//os:macos": glob([ + "include/lldb/Host/macosx/*.h", + "include/lldb/Host/posix/*.h", + ]), + "@platforms//os:linux": glob([ + "include/lldb/Host/linux/*.h", + "include/lldb/Host/posix/*.h", + ]), + }), + # TODO: Move this to Config library when https://github.com/bazelbuild/bazel/issues/21884 is fixed + linkopts = select({ + "@platforms//os:macos": [ + "-lcompression", + "-lxml2", + "-Wl,-framework,CoreServices", + "-Wl,-framework,Security", + ], + "//conditions:default": [], + }) + select({ + ":curses_enabled": [ + "-lcurses", + "-lpanel", + ], + "//conditions:default": [], + }) + select({ + ":libedit_enabled": [ + "-ledit", + ], + "//conditions:default": [], + }), + strip_include_prefix = "include", + deps = [ + ":Headers", + ":Utility", + "//llvm:Object", + "//llvm:Support", + "//llvm:TargetParser", + "//llvm:config", + ] + select({ + "@platforms//os:macos": [":HostMacOSXObjCXX"], + "//conditions:default": [], + }), +) + +td_library( + name = "CoreTdFiles", + srcs = glob([ + "source/Core/**/*.td", + "include/lldb/Core/*.td", + ]), +) + +gentbl_cc_library( + name = "CoreProperties", + strip_include_prefix = "source/Core", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Core/CoreProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Core/CorePropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Core/CoreProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "CoreHeaders", + hdrs = glob(["include/lldb/Core/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":BreakpointHeaders", + ":CoreProperties", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//clang:driver", + "//llvm:Demangle", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "Core", + srcs = glob(["source/Core/**/*.cpp"]), + hdrs = glob(["include/lldb/Core/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":BreakpointHeaders", + ":CoreHeaders", + ":CoreProperties", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":SymbolHeaders", + ":TargetHeaders", + ":Utility", + "//clang:driver", + "//lldb/source/Plugins:PluginCPlusPlusLanguageHeaders", + "//lldb/source/Plugins:PluginObjCLanguageHeaders", + "//llvm:Demangle", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "TargetProperties", + strip_include_prefix = "source/Target", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "source/Target/TargetProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "source/Target/TargetPropertiesEnum.inc", + ), + ], + tblgen = ":lldb-tblgen", + td_file = "source/Target/TargetProperties.td", + deps = [":CoreTdFiles"], +) + +cc_library( + name = "AppleArm64ExceptionClass", + hdrs = ["include/lldb/Target/AppleArm64ExceptionClass.def"], + strip_include_prefix = "include/lldb/Target", +) + +cc_library( + name = "TargetHeaders", + hdrs = glob(["include/lldb/Target/**/*.h"]), + strip_include_prefix = "include", + deps = [":AppleArm64ExceptionClass"], +) + +cc_library( + name = "Target", + srcs = glob(["source/Target/**/*.cpp"]), + deps = [ + ":BreakpointHeaders", + ":Core", + ":DataFormattersHeaders", + ":ExpressionHeaders", + ":Headers", + ":Host", + ":InterpreterHeaders", + ":Symbol", + ":SymbolHeaders", + ":TargetHeaders", + ":TargetProperties", + ":Utility", + "//lldb/source/Plugins:PluginProcessUtility", + "//llvm:MC", + "//llvm:Support", + ], +) + +cc_library( + name = "Headers", + hdrs = glob(["include/lldb/lldb-*.h"]), + strip_include_prefix = "include", +) + +cc_library( + name = "UtilityPrivateHeaders", + hdrs = glob(["source/Utility/**/*.h"]), + strip_include_prefix = "source", + deps = [":Headers"], +) + +cc_library( + name = "Utility", + srcs = glob(["source/Utility/**/*.cpp"]), + hdrs = glob(["include/lldb/Utility/**/*.h"]), + strip_include_prefix = "include", + deps = [ + ":Headers", + ":UtilityPrivateHeaders", + "//llvm:BinaryFormat", + "//llvm:Support", + "//llvm:TargetParser", + "//llvm:config", + ], +) + +cc_library( + name = "liblldb.static", + deps = [ + ":API", + ":Host", + ":Interpreter", + "//llvm:AllTargetsDisassemblers", + ] + [ + "//lldb/source/Plugins:Plugin{}".format(x) + for x in DEFAULT_PLUGINS + DEFAULT_SCRIPT_PLUGINS + ] + select({ + "@platforms//os:macos": [ + "//lldb/source/Plugins:PluginProcessMacOSXKernel", + "//lldb/source/Plugins:PluginSymbolLocatorDebugSymbols", + "//lldb/source/Plugins:PluginSymbolVendorMacOSX", + ], + "//conditions:default": [], + }), +) + +cc_shared_library( + name = "liblldb", + # TODO: Remove once fixed https://github.com/bazelbuild/bazel/issues/21893 + additional_linker_inputs = select({ + "@platforms//os:macos": [ + ":HostMacOSXObjCXX", + "//lldb/source/Plugins:PluginPlatformMacOSXObjCXX", + ], + "//conditions:default": [], + }), + shared_lib_name = select({ + "@platforms//os:macos": "liblldb{}.dylib".format(PACKAGE_VERSION), + "@platforms//os:linux": "liblldb{}.so".format(PACKAGE_VERSION), + }), + # TODO: Remove once fixed https://github.com/bazelbuild/bazel/issues/21893 + user_link_flags = select({ + "@platforms//os:macos": [ + "$(location :HostMacOSXObjCXX)", + "$(location //lldb/source/Plugins:PluginPlatformMacOSXObjCXX)", + ], + "//conditions:default": [], + }), + deps = [":liblldb.static"], +) + +gentbl_cc_library( + name = "lldb_options_inc_gen", + strip_include_prefix = ".", + tbl_outs = [( + ["-gen-opt-parser-defs"], + "Options.inc", + )], + tblgen = "//llvm:llvm-tblgen", + td_file = "tools/driver/Options.td", + deps = ["//llvm:OptParserTdFiles"], +) + +cc_binary( + name = "lldb", + srcs = glob([ + "tools/driver/*.cpp", + "tools/driver/*.h", + ]), + data = [ + ":lldb-argdumper", + ] + select({ + "@platforms//os:macos": [":debugserver"], + "//conditions:default": [], + }), + deps = [ + ":APIHeaders", + ":Host", + ":liblldb.static", + ":lldb_options_inc_gen", + "//llvm:Option", + "//llvm:Support", + ], +) + +cc_library( + name = "DebugServerCommonMacOSXHeaders", + hdrs = glob(["tools/debugserver/source/MacOSX/**/*.h"]), + strip_include_prefix = "tools/debugserver/source/MacOSX", +) + +cc_library( + name = "DebugServerCommonHeaders", + hdrs = glob(["tools/debugserver/source/**/*.h"]), + strip_include_prefix = "tools/debugserver/source", + deps = [":DebugServerCommonMacOSXHeaders"], +) + +objc_library( + name = "DebugServerMacOSX", + srcs = glob(["tools/debugserver/source/MacOSX/*.mm"]), + copts = OBJCPP_COPTS, + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":DebugServerCommonHeaders", + ":DebugServerCommonMacOSXHeaders", + ], +) + +cc_library( + name = "DebugServerCommon", + srcs = glob( + ["tools/debugserver/source/**/*.cpp"], + exclude = ["tools/debugserver/source/debugserver.cpp"], + ), + local_defines = ["LLDB_USE_OS_LOG"], + deps = [ + ":DebugServerCommonHeaders", + ":DebugServerCommonMacOSXHeaders", + ":DebugServerMacOSX", + ":Host", + ], +) + +genrule( + name = "mach_gen", + srcs = ["tools/debugserver/source/MacOSX/dbgnub-mig.defs"], + outs = [ + "mach_exc.h", + "mach_excServer.c", + "mach_excUser.c", + ], + cmd = "mig -header $(location :mach_exc.h) -server $(location :mach_excServer.c) -user $(location :mach_excUser.c) $(SRCS)", + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), +) + +expand_template( + name = "debugserver_version_gen", + out = "debugserver_vers.c", + substitutions = _VERSION_SUBSTITUTIONS, + template = "tools/debugserver/source/debugserver_vers.c.in", +) + +cc_binary( + name = "debugserver", + srcs = [ + "tools/debugserver/source/debugserver.cpp", + ":debugserver_version_gen", + ":mach_gen", + ], + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":DebugServerCommon"], +) + +cc_binary( + name = "lldb-argdumper", + srcs = glob(["tools/argdumper/*.cpp"]), + deps = ["//llvm:Support"], +) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel new file mode 100644 index 0000000..95773ed --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/BUILD.bazel @@ -0,0 +1,2319 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +load("@bazel_skylib//rules:expand_template.bzl", "expand_template") +load("//mlir:tblgen.bzl", "gentbl_cc_library") +load(":plugin_config.bzl", "DEFAULT_PLUGINS", "DEFAULT_SCRIPT_PLUGINS", "OBJCPP_COPTS") + +package( + default_visibility = ["//visibility:public"], + features = ["layering_check"], +) + +licenses(["notice"]) + +cc_library( + name = "PluginClangCommon", + srcs = glob(["Language/ClangCommon/*.cpp"]), + hdrs = glob(["Language/ClangCommon/*.h"]), + include_prefix = "Plugins", + deps = [ + "//clang:basic", + "//clang:lex", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjCLanguageHeaders", + hdrs = glob(["Language/ObjC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginObjCLanguage", + srcs = glob(["Language/ObjC/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginAppleObjCRuntime", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginObjCRuntime", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTypeSystemClangHeaders", + hdrs = glob(["TypeSystem/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClangHeaders", + "//clang:frontend", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginCPPRuntime", + srcs = glob(["LanguageRuntime/CPlusPlus/*.cpp"]), + hdrs = glob(["LanguageRuntime/CPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:CoreHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginAppleObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/AppleObjCRuntime/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/AppleObjCRuntime/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginObjCRuntime", + ":PluginProcessUtility", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTypeSystemClang", + srcs = glob(["TypeSystem/Clang/*.cpp"]), + deps = [ + ":PluginExpressionParserClangHeaders", + ":PluginObjCRuntime", + ":PluginSymbolFileDWARF", + ":PluginSymbolFileDWARFHeaders", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//clang:frontend", + "//clang:lex", + "//clang:sema", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginExpressionParserClangHeaders", + hdrs = glob(["ExpressionParser/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + ], +) + +cc_library( + name = "PluginExpressionParserClang", + srcs = glob(["ExpressionParser/Clang/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:basic", + "//clang:codegen", + "//clang:config", + "//clang:driver", + "//clang:edit", + "//clang:frontend", + "//clang:frontend_rewrite", + "//clang:lex", + "//clang:parse", + "//clang:rewrite", + "//clang:sema", + "//clang:serialization", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:ExecutionEngine", + "//llvm:IPO", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "PlatformMacOSXProperties", + strip_include_prefix = "Platform/MacOSX", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/MacOSX/PlatformMacOSXProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/MacOSX/PlatformMacOSXPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/MacOSX/PlatformMacOSXProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformMacOSXObjCXXHeaders", + hdrs = glob(["Platform/MacOSX/objcxx/*.h"]), + include_prefix = "Plugins", + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = ["//lldb:Host"], +) + +objc_library( + name = "PluginPlatformMacOSXObjCXX", + srcs = glob(["Platform/MacOSX/objcxx/*.mm"]), + copts = OBJCPP_COPTS, + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [":PluginPlatformMacOSXObjCXXHeaders"], +) + +cc_library( + name = "PluginPlatformMacOSX", + srcs = glob( + ["Platform/MacOSX/*.cpp"], + exclude = ["Platform/MacOSX/PlatformAppleSimulator.cpp"], + ) + + select({ + "@platforms//os:macos": ["Platform/MacOSX/PlatformAppleSimulator.cpp"], + "//conditions:default": [], + }), + hdrs = glob(["Platform/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformMacOSXProperties", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginObjectContainerMachOFileset", + ":PluginPlatformPOSIX", + "//clang:driver_options_inc_gen", + "//lldb:BreakpointHeaders", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ] + select({ + "@platforms//os:macos": [":PluginPlatformMacOSXObjCXX"], + "//conditions:default": [], + }), +) + +gentbl_cc_library( + name = "SymbolFileDWARFProperties", + strip_include_prefix = "SymbolFile/DWARF", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "SymbolFile/DWARF/SymbolFileDWARFProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "SymbolFile/DWARF/SymbolFileDWARFPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "SymbolFile/DWARF/SymbolFileDWARFProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginSymbolFileDWARFHeaders", + hdrs = glob(["SymbolFile/DWARF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + ], +) + +cc_library( + name = "PluginSymbolFileDWARF", + srcs = glob(["SymbolFile/DWARF/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjCLanguageHeaders", + ":PluginSymbolFileDWARFHeaders", + ":PluginTypeSystemClangHeaders", + ":SymbolFileDWARFProperties", + "//clang:ast", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:DebugInfoDWARF", + "//llvm:Demangle", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessUtility", + srcs = glob(["Process/Utility/*.cpp"]), + hdrs = glob(["Process/Utility/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:BreakpointHeaders", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFilePDB", + srcs = glob(["ObjectFile/PDB/*.cpp"]), + hdrs = glob(["ObjectFile/PDB/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:DebugInfoPDB", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFileNativePDBHeaders", + hdrs = glob(["SymbolFile/NativePDB/*.h"]), + include_prefix = "Plugins", + deps = ["//lldb:Core"], +) + +cc_library( + name = "PluginSymbolFileNativePDB", + srcs = glob(["SymbolFile/NativePDB/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginObjectFilePDB", + ":PluginProcessUtility", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:DebugInfoCodeView", + "//llvm:DebugInfoMSF", + "//llvm:DebugInfoPDB", + "//llvm:Demangle", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFilePDBHeaders", + hdrs = glob(["SymbolFile/PDB/*.h"]), + include_prefix = "Plugins", + deps = ["//lldb:Core"], +) + +cc_library( + name = "PluginSymbolFilePDB", + srcs = glob(["SymbolFile/PDB/*.cpp"]), + deps = [ + ":PluginCPlusPlusLanguageHeaders", + ":PluginExpressionParserClangHeaders", + ":PluginSymbolFileNativePDB", + ":PluginSymbolFileNativePDBHeaders", + ":PluginSymbolFilePDBHeaders", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//clang:lex", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:DebugInfoCodeView", + "//llvm:DebugInfoPDB", + ], +) + +gentbl_cc_library( + name = "ProcessGDBRemoteProperties", + strip_include_prefix = "Process/gdb-remote", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Process/gdb-remote/ProcessGDBRemoteProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Process/gdb-remote/ProcessGDBRemotePropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Process/gdb-remote/ProcessGDBRemoteProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginProcessGDBRemote", + srcs = glob(["Process/gdb-remote/*.cpp"]), + hdrs = glob(["Process/gdb-remote/*.h"]) + [ + "Process/gdb-remote/GDBRemoteErrno.def", + ], + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":ProcessGDBRemoteProperties", + "//lldb:BreakpointHeaders", + "//lldb:CoreHeaders", + "//lldb:DataFormattersHeaders", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + "@llvm_zlib//:zlib", + ], +) + +cc_library( + name = "PluginObjectContainerMachOArchive", + srcs = glob(["ObjectContainer/Universal-Mach-O/*.cpp"]), + hdrs = glob(["ObjectContainer/Universal-Mach-O/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectContainerBSDArchive", + srcs = glob(["ObjectContainer/BSD-Archive/*.cpp"]), + hdrs = glob(["ObjectContainer/BSD-Archive/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectContainerMachOFileset", + srcs = glob(["ObjectContainer/Mach-O-Fileset/*.cpp"]), + hdrs = glob(["ObjectContainer/Mach-O-Fileset/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +gentbl_cc_library( + name = "StructuredDataDarwinLogProperties", + strip_include_prefix = "StructuredData/DarwinLog", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "StructuredData/DarwinLog/StructuredDataDarwinLogProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "StructuredData/DarwinLog/StructuredDataDarwinLogPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "StructuredData/DarwinLog/StructuredDataDarwinLogProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginStructuredDataDarwinLog", + srcs = glob(["StructuredData/DarwinLog/*.cpp"]), + hdrs = glob(["StructuredData/DarwinLog/*.h"]), + include_prefix = "Plugins", + deps = [ + ":StructuredDataDarwinLogProperties", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginTraceCommon", + srcs = glob(["Trace/common/*.cpp"]), + hdrs = glob(["Trace/common/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginPlatformPOSIX", + srcs = glob(["Platform/POSIX/*.cpp"]), + hdrs = glob(["Platform/POSIX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformGDB", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "PlatformQemuUserProperties", + strip_include_prefix = "Platform/QemuUser", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/QemuUser/PlatformQemuUserProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/QemuUser/PlatformQemuUserPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/QemuUser/PlatformQemuUserProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformQemuUser", + srcs = glob(["Platform/QemuUser/*.cpp"]), + hdrs = glob(["Platform/QemuUser/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformQemuUserProperties", + ":PluginProcessGDBRemote", + "//lldb:CoreHeaders", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginPlatformGDB", + srcs = glob(["Platform/gdb-server/*.cpp"]), + hdrs = glob(["Platform/gdb-server/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessGDBRemote", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginPlatformLinux", + srcs = glob(["Platform/Linux/*.cpp"]), + hdrs = glob(["Platform/Linux/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformPOSIX", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + ], +) + +gentbl_cc_library( + name = "PlatformAndroidProperties", + strip_include_prefix = "Platform/Android", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Platform/Android/PlatformAndroidProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Platform/Android/PlatformAndroidPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Platform/Android/PlatformAndroidProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginPlatformAndroid", + srcs = glob(["Platform/Android/*.cpp"]), + hdrs = glob(["Platform/Android/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PlatformAndroidProperties", + ":PluginPlatformGDB", + ":PluginPlatformLinux", + ":PluginPlatformPOSIX", + "//lldb:Core", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginPlatformWindows", + srcs = glob(["Platform/Windows/*.cpp"]), + hdrs = glob(["Platform/Windows/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginPlatformGDB", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginMemoryHistoryASan", + srcs = glob(["MemoryHistory/asan/*.cpp"]), + hdrs = glob(["MemoryHistory/asan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginClangREPL", + srcs = glob(["REPL/Clang/*.cpp"]), + hdrs = glob(["REPL/Clang/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginClangCommon", + ":PluginTypeSystemClang", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:ExpressionHeaders", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorWasm", + srcs = glob(["SymbolVendor/wasm/*.cpp"]), + hdrs = glob(["SymbolVendor/wasm/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorMacOSX", + srcs = glob(["SymbolVendor/MacOSX/*.cpp"]), + hdrs = glob(["SymbolVendor/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileMachO", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorPECOFF", + srcs = glob(["SymbolVendor/PECOFF/*.cpp"]), + hdrs = glob(["SymbolVendor/PECOFF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFilePECOFF", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolVendorELF", + srcs = glob(["SymbolVendor/ELF/*.cpp"]), + hdrs = glob(["SymbolVendor/ELF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileELF", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjCPlusPlusLanguage", + srcs = glob(["Language/ObjCPlusPlus/*.cpp"]), + hdrs = glob(["Language/ObjCPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginCPlusPlusLanguageHeaders", + hdrs = glob(["Language/CPlusPlus/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + "//lldb:CoreHeaders", + ], +) + +cc_library( + name = "PluginCPlusPlusLanguage", + srcs = glob(["Language/CPlusPlus/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginCPlusPlusLanguageHeaders", + ":PluginClangCommon", + ":PluginExpressionParserClangHeaders", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:basic", + "//lldb:Core", + "//lldb:DataFormatters", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Demangle", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "TraceExporterCTFOptions", + strip_include_prefix = "TraceExporter/ctf", + tbl_outs = [( + ["-gen-lldb-option-defs"], + "TraceExporter/ctf/TraceExporterCTFCommandOptions.inc", + )], + tblgen = "//lldb:lldb-tblgen", + td_file = "TraceExporter/ctf/TraceExporterCTFOptions.td", + deps = [ + "//lldb:CommandsTdFiles", + "//lldb:CoreTdFiles", + ], +) + +cc_library( + name = "PluginTraceExporterCTF", + srcs = glob(["TraceExporter/ctf/*.cpp"]), + hdrs = glob(["TraceExporter/ctf/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTraceExporterCommon", + ":TraceExporterCTFOptions", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +cc_library( + name = "PluginTraceExporterCommon", + srcs = glob(["TraceExporter/common/*.cpp"]), + hdrs = glob(["TraceExporter/common/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginABIPowerPC", + srcs = glob(["ABI/PowerPC/*.cpp"]), + hdrs = glob(["ABI/PowerPC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIHexagon", + srcs = glob(["ABI/Hexagon/*.cpp"]), + hdrs = glob(["ABI/Hexagon/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIMips", + srcs = glob(["ABI/Mips/*.cpp"]), + hdrs = glob(["ABI/Mips/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIMSP430", + srcs = glob(["ABI/MSP430/*.cpp"]), + hdrs = glob(["ABI/MSP430/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIX86", + srcs = glob(["ABI/X86/*.cpp"]), + hdrs = glob(["ABI/X86/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIARM", + srcs = glob(["ABI/ARM/*.cpp"]), + hdrs = glob(["ABI/ARM/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIARC", + srcs = glob(["ABI/ARC/*.cpp"]), + hdrs = glob(["ABI/ARC/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIRISCV", + srcs = glob(["ABI/RISCV/*.cpp"]), + hdrs = glob(["ABI/RISCV/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Core", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABISystemZ", + srcs = glob(["ABI/SystemZ/*.cpp"]), + hdrs = glob(["ABI/SystemZ/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginABIAArch64", + srcs = glob(["ABI/AArch64/*.cpp"]), + hdrs = glob(["ABI/AArch64/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginDynamicLoaderPosixDYLDHeaders", + hdrs = glob(["DynamicLoader/POSIX-DYLD/*.h"]), + include_prefix = "Plugins", +) + +cc_library( + name = "PluginDynamicLoaderPosixDYLD", + srcs = glob(["DynamicLoader/POSIX-DYLD/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderPosixDYLDHeaders", + ":PluginProcessElfCore", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginDynamicLoaderWindowsDYLD", + srcs = glob(["DynamicLoader/Windows-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/Windows-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginDynamicLoaderHexagonDYLD", + srcs = glob(["DynamicLoader/Hexagon-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/Hexagon-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderWasmDYLD", + srcs = glob(["DynamicLoader/wasm-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/wasm-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderStatic", + srcs = glob(["DynamicLoader/Static/*.cpp"]), + hdrs = glob(["DynamicLoader/Static/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDynamicLoaderMacOSXDYLD", + srcs = glob(["DynamicLoader/MacOSX-DYLD/*.cpp"]), + hdrs = glob(["DynamicLoader/MacOSX-DYLD/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:TargetParser", + ], +) + +gentbl_cc_library( + name = "DynamicLoaderDarwinKernelProperties", + strip_include_prefix = "DynamicLoader/Darwin-Kernel", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "DynamicLoader/Darwin-Kernel/DynamicLoaderDarwinKernelProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginDynamicLoaderDarwinKernelHeaders", + hdrs = glob(["DynamicLoader/Darwin-Kernel/*.h"]), + include_prefix = "Plugins", +) + +cc_library( + name = "PluginDynamicLoaderDarwinKernel", + srcs = glob(["DynamicLoader/Darwin-Kernel/*.cpp"]), + include_prefix = "Plugins", + deps = [ + ":DynamicLoaderDarwinKernelProperties", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginObjectFileMachO", + ":PluginPlatformMacOSX", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitecturePPC64", + srcs = glob(["Architecture/PPC64/*.cpp"]), + hdrs = glob(["Architecture/PPC64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + ], +) + +cc_library( + name = "PluginArchitectureMips", + srcs = glob(["Architecture/Mips/*.cpp"]), + hdrs = glob(["Architecture/Mips/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitectureArm", + srcs = glob(["Architecture/Arm/*.cpp"]), + hdrs = glob(["Architecture/Arm/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginArchitectureAArch64", + srcs = glob(["Architecture/AArch64/*.cpp"]), + hdrs = glob(["Architecture/AArch64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginDisassemblerLLVMC", + srcs = glob(["Disassembler/LLVMC/*.cpp"]), + hdrs = glob(["Disassembler/LLVMC/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginSymbolFileSymtab", + srcs = glob(["SymbolFile/Symtab/*.cpp"]), + hdrs = glob(["SymbolFile/Symtab/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginSymbolFileCTF", + srcs = glob(["SymbolFile/CTF/*.cpp"]), + hdrs = glob(["SymbolFile/CTF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClangHeaders", + ":PluginTypeSystemClangHeaders", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + "@llvm_zlib//:zlib", + ], +) + +cc_library( + name = "PluginSymbolFileJSON", + srcs = glob(["SymbolFile/JSON/*.cpp"]), + hdrs = glob(["SymbolFile/JSON/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileJSON", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolFileBreakpad", + srcs = glob(["SymbolFile/Breakpad/*.cpp"]), + hdrs = glob(["SymbolFile/Breakpad/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileBreakpad", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionPPC64", + srcs = glob(["Instruction/PPC64/*.cpp"]), + hdrs = glob(["Instruction/PPC64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstructionLoongArch", + srcs = glob(["Instruction/LoongArch/*.cpp"]), + hdrs = glob(["Instruction/LoongArch/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionMIPS", + srcs = glob(["Instruction/MIPS/*.cpp"]), + hdrs = glob(["Instruction/MIPS/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionMIPS64", + srcs = glob(["Instruction/MIPS64/*.cpp"]), + hdrs = glob(["Instruction/MIPS64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:MC", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionARM", + srcs = glob(["Instruction/ARM/*.cpp"]), + hdrs = glob(["Instruction/ARM/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionRISCV", + srcs = glob(["Instruction/RISCV/*.cpp"]), + hdrs = glob(["Instruction/RISCV/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstructionARM64", + srcs = glob(["Instruction/ARM64/*.cpp"]), + hdrs = glob(["Instruction/ARM64/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeASanLibsanitizers", + srcs = glob(["InstrumentationRuntime/ASanLibsanitizers/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/ASanLibsanitizers/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginInstrumentationRuntimeUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeTSan", + srcs = glob(["InstrumentationRuntime/TSan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/TSan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeASan", + srcs = glob(["InstrumentationRuntime/ASan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/ASan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginInstrumentationRuntimeUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeMainThreadChecker", + srcs = glob(["InstrumentationRuntime/MainThreadChecker/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/MainThreadChecker/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Interpreter", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeUBSan", + srcs = glob(["InstrumentationRuntime/UBSan/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/UBSan/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginInstrumentationRuntimeUtility", + srcs = glob(["InstrumentationRuntime/Utility/*.cpp"]), + hdrs = glob(["InstrumentationRuntime/Utility/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +gentbl_cc_library( + name = "JITLoaderGDBProperties", + strip_include_prefix = "JITLoader/GDB", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "JITLoader/GDB/JITLoaderGDBProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "JITLoader/GDB/JITLoaderGDBPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "JITLoader/GDB/JITLoaderGDBProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginJITLoaderGDB", + srcs = glob(["JITLoader/GDB/*.cpp"]), + hdrs = glob(["JITLoader/GDB/*.h"]), + include_prefix = "Plugins", + deps = [ + ":JITLoaderGDBProperties", + ":PluginObjectFileMachO", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSymbolLocatorDefault", + srcs = glob(["SymbolLocator/Default/*.cpp"]), + hdrs = glob(["SymbolLocator/Default/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "SymbolLocatorDebuginfodProperties", + strip_include_prefix = "SymbolLocator/Debuginfod", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "SymbolLocator/Debuginfod/SymbolLocatorDebuginfodProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginSymbolLocatorDebuginfod", + srcs = glob(["SymbolLocator/Debuginfod/*.cpp"]), + hdrs = glob(["SymbolLocator/Debuginfod/*.h"]), + include_prefix = "Plugins", + deps = [ + ":SymbolLocatorDebuginfodProperties", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Debuginfod", + ], +) + +cc_library( + name = "PluginSymbolLocatorDebugSymbols", + srcs = glob(["SymbolLocator/DebugSymbols/*.cpp"]), + hdrs = glob(["SymbolLocator/DebugSymbols/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFileWasm", + "//lldb:Core", + "//lldb:Host", + "//lldb:HostMacOSXPrivateHeaders", + "//lldb:Symbol", + ], +) + +# TODO: python support +# cc_library( +# name = "PluginOperatingSystemPython", +# srcs = glob(["OperatingSystem/Python/*.cpp"]), +# hdrs = glob(["OperatingSystem/Python/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Interpreter", +# ":PluginProcessUtility", +# "//lldb:Symbol", +# "//lldb:Target", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPythonInterfaces", +# srcs = glob(["ScriptInterpreter/Python/Interfaces/*.cpp"]), +# hdrs = glob(["ScriptInterpreter/Python/Interfaces/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Host", +# "//lldb:Interpreter", +# "//lldb:Target", +# "@rules_python//python/cc:current_py_cc_headers", +# "@rules_python//python/cc:current_py_cc_libs", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPythonHeaders", +# hdrs = glob(["ScriptInterpreter/Python/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Host", +# ], +# ) +# cc_library( +# name = "PluginScriptInterpreterPython", +# srcs = glob(["ScriptInterpreter/Python/*.cpp"]), +# local_defines = [ +# 'LLDB_PYTHON_EXE_RELATIVE_PATH=\\"bin/python3\\"', +# # Must be kept in sync with WORKSPACE python version +# 'LLDB_PYTHON_RELATIVE_LIBDIR=\\"lib/python3.11/site-packages\\"', +# ], +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Breakpoint", +# "//lldb:Core", +# "//lldb:DataFormatters", +# "//lldb:Host", +# "//lldb:Interpreter", +# ":PluginScriptInterpreterPythonHeaders", +# ":PluginScriptInterpreterPythonInterfaces", +# "//lldb:Target", +# ], +# ) + +# TODO: lua support +# cc_library( +# name = "PluginScriptInterpreterLua", +# srcs = glob(["ScriptInterpreter/Lua/*.cpp"]), +# hdrs = glob(["ScriptInterpreter/Lua/*.h"]), +# include_prefix = "Plugins", +# deps = [ +# "//lldb:Core", +# "//lldb:Interpreter", +# ], +# ) + +cc_library( + name = "PluginScriptInterpreterNone", + srcs = glob(["ScriptInterpreter/None/*.cpp"]), + hdrs = glob(["ScriptInterpreter/None/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginSystemRuntimeMacOSX", + srcs = glob(["SystemRuntime/MacOSX/*.cpp"]), + hdrs = glob(["SystemRuntime/MacOSX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectFileCOFF", + srcs = glob(["ObjectFile/COFF/*.cpp"]), + hdrs = glob(["ObjectFile/COFF/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:Utility", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileWasm", + srcs = glob(["ObjectFile/wasm/*.cpp"]), + hdrs = glob(["ObjectFile/wasm/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileJSON", + srcs = glob(["ObjectFile/JSON/*.cpp"]), + hdrs = glob(["ObjectFile/JSON/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFilePlaceholder", + srcs = glob(["ObjectFile/Placeholder/*.cpp"]), + hdrs = glob(["ObjectFile/Placeholder/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginObjectFileMachO", + srcs = glob(["ObjectFile/Mach-O/*.cpp"]), + hdrs = glob(["ObjectFile/Mach-O/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginObjectFileMinidump", + srcs = glob(["ObjectFile/Minidump/*.cpp"]), + hdrs = glob(["ObjectFile/Minidump/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessMinidump", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "ObjectFilePECOFFProperties", + strip_include_prefix = "ObjectFile/PECOFF", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "ObjectFile/PECOFF/ObjectFilePECOFFProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "ObjectFile/PECOFF/ObjectFilePECOFFPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "ObjectFile/PECOFF/ObjectFilePECOFFProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginObjectFilePECOFF", + srcs = glob(["ObjectFile/PECOFF/*.cpp"]), + hdrs = glob(["ObjectFile/PECOFF/*.h"]), + include_prefix = "Plugins", + deps = [ + ":ObjectFilePECOFFProperties", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFileBreakpad", + srcs = glob(["ObjectFile/Breakpad/*.cpp"]), + hdrs = glob(["ObjectFile/Breakpad/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Utility", + "//llvm:Support", + "//llvm:TargetParser", + ], +) + +cc_library( + name = "PluginObjectFileELF", + srcs = glob(["ObjectFile/ELF/*.cpp"]), + hdrs = glob(["ObjectFile/ELF/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginUnwindAssemblyX86", + srcs = glob(["UnwindAssembly/x86/*.cpp"]), + hdrs = glob(["UnwindAssembly/x86/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:MCDisassembler", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginUnwindAssemblyInstEmulation", + srcs = glob(["UnwindAssembly/InstEmulation/*.cpp"]), + hdrs = glob(["UnwindAssembly/InstEmulation/*.h"]), + include_prefix = "Plugins", + deps = [ + "//lldb:Core", + "//lldb:Headers", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginProcessPOSIX", + srcs = glob(["Process/POSIX/*.cpp"]), + hdrs = glob(["Process/POSIX/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Headers", + "//lldb:Host", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginScriptedProcess", + srcs = glob(["Process/scripted/*.cpp"]), + hdrs = glob(["Process/scripted/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Host", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginProcessMachCore", + srcs = glob(["Process/mach-core/*.cpp"]), + hdrs = glob(["Process/mach-core/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginDynamicLoaderMacOSXDYLD", + ":PluginDynamicLoaderStatic", + ":PluginObjectFileMachO", + ":PluginPlatformMacOSX", + ":PluginProcessUtility", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Host", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessElfCore", + srcs = glob(["Process/elf-core/*.cpp"]), + hdrs = glob(["Process/elf-core/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginDynamicLoaderPosixDYLDHeaders", + ":PluginObjectFileELF", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:BinaryFormat", + "//llvm:Support", + ], +) + +gentbl_cc_library( + name = "ProcessKDPProperties", + strip_include_prefix = "Process/MacOSX-Kernel", + tbl_outs = [ + ( + ["-gen-lldb-property-defs"], + "Process/MacOSX-Kernel/ProcessKDPProperties.inc", + ), + ( + ["-gen-lldb-property-enum-defs"], + "Process/MacOSX-Kernel/ProcessKDPPropertiesEnum.inc", + ), + ], + tblgen = "//lldb:lldb-tblgen", + td_file = "Process/MacOSX-Kernel/ProcessKDPProperties.td", + deps = ["//lldb:CoreTdFiles"], +) + +cc_library( + name = "PluginProcessMacOSXKernel", + srcs = glob(["Process/MacOSX-Kernel/*.cpp"]), + hdrs = glob(["Process/MacOSX-Kernel/*.h"]), + include_prefix = "Plugins", + target_compatible_with = select({ + "@platforms//os:macos": [], + "//conditions:default": ["@platforms//:incompatible"], + }), + deps = [ + ":PluginDynamicLoaderDarwinKernel", + ":PluginDynamicLoaderDarwinKernelHeaders", + ":PluginDynamicLoaderStatic", + ":PluginProcessUtility", + ":ProcessKDPProperties", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginProcessMinidump", + srcs = glob(["Process/minidump/*.cpp"]), + hdrs = glob(["Process/minidump/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginObjectFilePlaceholder", + ":PluginProcessElfCore", + ":PluginProcessUtility", + "//lldb:Core", + "//lldb:Headers", + "//lldb:InterpreterHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//lldb:UtilityPrivateHeaders", + "//llvm:BinaryFormat", + "//llvm:Object", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginCXXItaniumABI", + srcs = glob(["LanguageRuntime/CPlusPlus/ItaniumABI/*.cpp"]), + hdrs = glob(["LanguageRuntime/CPlusPlus/ItaniumABI/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginCPPRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:DataFormattersHeaders", + "//lldb:ExpressionHeaders", + "//lldb:Headers", + "//lldb:Interpreter", + "//lldb:InterpreterHeaders", + "//lldb:Symbol", + "//lldb:SymbolHeaders", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + ], +) + +cc_library( + name = "PluginGNUstepObjCRuntime", + srcs = glob(["LanguageRuntime/ObjC/GNUstepObjCRuntime/*.cpp"]), + hdrs = glob(["LanguageRuntime/ObjC/GNUstepObjCRuntime/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginExpressionParserClang", + ":PluginObjCRuntime", + ":PluginTypeSystemClang", + ":PluginTypeSystemClangHeaders", + "//lldb:Breakpoint", + "//lldb:Core", + "//lldb:Expression", + "//lldb:Headers", + "//lldb:Host", + "//lldb:Interpreter", + "//lldb:Symbol", + "//lldb:Target", + "//lldb:TargetHeaders", + "//lldb:Utility", + "//llvm:Support", + ], +) + +cc_library( + name = "PluginRegisterTypeBuilderClang", + srcs = glob(["RegisterTypeBuilder/*.cpp"]), + hdrs = glob(["RegisterTypeBuilder/*.h"]), + include_prefix = "Plugins", + deps = [ + ":PluginTypeSystemClangHeaders", + "//clang:ast", + "//lldb:Core", + "//lldb:Headers", + "//lldb:Target", + "//lldb:TargetHeaders", + ], +) + +_DEFAULT_LOAD_PLUGINS = "\n".join(["LLDB_PLUGIN({})".format(x) for x in DEFAULT_PLUGINS]) + \ + "\n" + "\n".join(["LLDB_SCRIPT_PLUGIN({})".format(x) for x in DEFAULT_SCRIPT_PLUGINS]) + +expand_template( + name = "plugins_config_gen", + out = "Plugins.def", + substitutions = { + "@LLDB_PROCESS_WINDOWS_PLUGIN@": "", + "@LLDB_PROCESS_GDB_PLUGIN@": "LLDB_PLUGIN(ProcessGDBRemote)", + } | select({ + "@platforms//os:macos": { + "@LLDB_ENUM_PLUGINS@": _DEFAULT_LOAD_PLUGINS + """ +LLDB_PLUGIN(ProcessMacOSXKernel) +LLDB_PLUGIN(SymbolLocatorDebugSymbols) +LLDB_PLUGIN(SymbolVendorMacOSX) +""", + }, + "//conditions:default": { + "@LLDB_ENUM_PLUGINS@": _DEFAULT_LOAD_PLUGINS, + }, + }), + template = "Plugins.def.in", +) + +cc_library( + name = "PluginsConfig", + hdrs = [":plugins_config_gen"], + include_prefix = "Plugins", +) diff --git a/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl new file mode 100644 index 0000000..5949d2d --- /dev/null +++ b/utils/bazel/llvm-project-overlay/lldb/source/Plugins/plugin_config.bzl @@ -0,0 +1,104 @@ +# This file is licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +"""Common configuration for LLDB plugins.""" + +load("//:vars.bzl", "CMAKE_CXX_STANDARD") + +DEFAULT_PLUGINS = [ + "ABIAArch64", + "ABIARM", + "ABIHexagon", + "ABIMips", + "ABIMSP430", + "ABIPowerPC", + "ABIRISCV", + "ABISystemZ", + "ABIX86", + "AppleObjCRuntime", + "ArchitectureAArch64", + "ArchitectureArm", + "ArchitectureMips", + "ArchitecturePPC64", + "ClangREPL", + "CPlusPlusLanguage", + "CXXItaniumABI", + "DisassemblerLLVMC", + "DynamicLoaderDarwinKernel", + "DynamicLoaderHexagonDYLD", + "DynamicLoaderMacOSXDYLD", + "DynamicLoaderPosixDYLD", + "DynamicLoaderStatic", + "DynamicLoaderWasmDYLD", + "DynamicLoaderWindowsDYLD", + "GNUstepObjCRuntime", + "InstructionARM", + "InstructionARM64", + "InstructionLoongArch", + "InstructionMIPS", + "InstructionMIPS64", + "InstructionPPC64", + "InstructionRISCV", + "InstrumentationRuntimeASan", + "InstrumentationRuntimeASanLibsanitizers", + "InstrumentationRuntimeMainThreadChecker", + "InstrumentationRuntimeTSan", + "InstrumentationRuntimeUBSan", + "JITLoaderGDB", + "MemoryHistoryASan", + "ObjCLanguage", + "ObjCPlusPlusLanguage", + "ObjectContainerBSDArchive", + "ObjectContainerMachOArchive", + "ObjectContainerMachOFileset", + "ObjectFileBreakpad", + "ObjectFileCOFF", + "ObjectFileELF", + "ObjectFileJSON", + "ObjectFileMachO", + "ObjectFileMinidump", + "ObjectFilePDB", + "ObjectFilePECOFF", + "ObjectFilePlaceholder", + "ObjectFileWasm", + "PlatformAndroid", + "PlatformGDB", + "PlatformLinux", + "PlatformMacOSX", + "PlatformQemuUser", + "PlatformWindows", + "ProcessElfCore", + "ProcessMachCore", + "ProcessMinidump", + "RegisterTypeBuilderClang", + "ScriptedProcess", + "StructuredDataDarwinLog", + "SymbolFileBreakpad", + "SymbolFileCTF", + "SymbolFileDWARF", + "SymbolFileJSON", + "SymbolFilePDB", + "SymbolFileSymtab", + "SymbolLocatorDebuginfod", + "SymbolLocatorDefault", + "SymbolVendorELF", + "SymbolVendorPECOFF", + "SymbolVendorWasm", + "SystemRuntimeMacOSX", + "TraceExporterCTF", + "TypeSystemClang", + "UnwindAssemblyInstEmulation", + "UnwindAssemblyX86", +] + +DEFAULT_SCRIPT_PLUGINS = [ + "ScriptInterpreterNone", +] + +OBJCPP_COPTS = [ + "-std=c++{}".format(CMAKE_CXX_STANDARD), + "-fno-objc-exceptions", + "-fno-objc-arc", + "-Wno-shorten-64-to-32", +] -- cgit v1.1 From bffc0b65692596137cbdcdd3b48e2f31320dacbd Mon Sep 17 00:00:00 2001 From: Paul Kirth Date: Thu, 4 Apr 2024 15:21:59 -0700 Subject: [RISCV][NFC] Add isTargetAndroid API in RISCVSubtarget (#87671) This is required to set target specific code generation options for Android, like using the TLS slot for the stack protector. --- llvm/lib/Target/RISCV/RISCVSubtarget.h | 1 + 1 file changed, 1 insertion(+) diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h index ba108912..85f8f5f 100644 --- a/llvm/lib/Target/RISCV/RISCVSubtarget.h +++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h @@ -254,6 +254,7 @@ public: const LegalizerInfo *getLegalizerInfo() const override; const RegisterBankInfo *getRegBankInfo() const override; + bool isTargetAndroid() const { return getTargetTriple().isAndroid(); } bool isTargetFuchsia() const { return getTargetTriple().isOSFuchsia(); } bool useConstantPoolForLargeInts() const; -- cgit v1.1 From 413a66f33984a4d484ac1ff0ba7c0ac39ffa3095 Mon Sep 17 00:00:00 2001 From: Alexey Bataev Date: Thu, 4 Apr 2024 18:30:17 -0400 Subject: [LV, VP]VP intrinsics support for the Loop Vectorizer + adding new tail-folding mode using EVL. (#76172) This patch introduces generating VP intrinsics in the Loop Vectorizer. Currently the Loop Vectorizer supports vector predication in a very limited capacity via tail-folding and masked load/store/gather/scatter intrinsics. However, this does not let architectures with active vector length predication support take advantage of their capabilities. Architectures with general masked predication support also can only take advantage of predication on memory operations. By having a way for the Loop Vectorizer to generate Vector Predication intrinsics, which (will) provide a target-independent way to model predicated vector instructions. These architectures can make better use of their predication capabilities. Our first approach (implemented in this patch) builds on top of the existing tail-folding mechanism in the LV (just adds a new tail-folding mode using EVL), but instead of generating masked intrinsics for memory operations it generates VP intrinsics for loads/stores instructions. The patch adds a new VPlanTransforms to replace the wide header predicate compare with EVL and updates codegen for load/stores to use VP store/load with EVL. Other important part of this approach is how the Explicit Vector Length is computed. (VP intrinsics define this vector length parameter as Explicit Vector Length (EVL)). We use an experimental intrinsic `get_vector_length`, that can be lowered to architecture specific instruction(s) to compute EVL. Also, added a new recipe to emit instructions for computing EVL. Using VPlan in this way will eventually help build and compare VPlans corresponding to different strategies and alternatives. Differential Revision: https://reviews.llvm.org/D99750 --- llvm/include/llvm/Analysis/TargetTransformInfo.h | 5 +- llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp | 4 + llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h | 16 ++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 205 ++++++++++++++++++--- llvm/lib/Transforms/Vectorize/VPlan.cpp | 16 +- llvm/lib/Transforms/Vectorize/VPlan.h | 68 +++++-- llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp | 16 +- llvm/lib/Transforms/Vectorize/VPlanRecipes.cpp | 56 +++++- llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 124 +++++++++++-- llvm/lib/Transforms/Vectorize/VPlanTransforms.h | 7 + llvm/lib/Transforms/Vectorize/VPlanValue.h | 1 + llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp | 43 +++++ .../PowerPC/vectorize-force-tail-with-evl.ll | 51 +++++ .../PowerPC/vplan-force-tail-with-evl.ll | 117 ++++++++++++ .../LoopVectorize/RISCV/inloop-reduction.ll | 68 ++++++- ...vectorize-force-tail-with-evl-gather-scatter.ll | 116 ++++++++++++ .../vectorize-force-tail-with-evl-interleave.ll | 175 ++++++++++++++++++ .../RISCV/vectorize-force-tail-with-evl-iv32.ll | 124 +++++++++++++ ...ctorize-force-tail-with-evl-masked-loadstore.ll | 132 +++++++++++++ .../vectorize-force-tail-with-evl-no-masking.ll | 36 ++++ ...orize-force-tail-with-evl-reverse-load-store.ll | 119 ++++++++++++ .../LoopVectorize/RISCV/vectorize-vp-intrinsics.ll | 142 ++++++++++++++ .../LoopVectorize/RISCV/vplan-vp-intrinsics.ll | 134 ++++++++++++++ .../X86/vectorize-force-tail-with-evl.ll | 191 +++++++++++++++++++ .../LoopVectorize/X86/vplan-vp-intrinsics.ll | 89 +++++++++ .../LoopVectorize/vectorize-force-tail-with-evl.ll | 101 ++++++++++ .../LoopVectorize/vplan-force-tail-with-evl.ll | 37 ++++ 27 files changed, 2124 insertions(+), 69 deletions(-) create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll create mode 100644 llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll create mode 100644 llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll create mode 100644 llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll create mode 100644 llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll diff --git a/llvm/include/llvm/Analysis/TargetTransformInfo.h b/llvm/include/llvm/Analysis/TargetTransformInfo.h index bad0a77..fa9392b 100644 --- a/llvm/include/llvm/Analysis/TargetTransformInfo.h +++ b/llvm/include/llvm/Analysis/TargetTransformInfo.h @@ -190,7 +190,10 @@ enum class TailFoldingStyle { /// Use predicate to control both data and control flow, but modify /// the trip count so that a runtime overflow check can be avoided /// and such that the scalar epilogue loop can always be removed. - DataAndControlFlowWithoutRuntimeCheck + DataAndControlFlowWithoutRuntimeCheck, + /// Use predicated EVL instructions for tail-folding. + /// Indicates that VP intrinsics should be used. + DataWithEVL, }; struct TailFoldingInfo { diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp index 27a4d78..aeec063 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp @@ -245,6 +245,10 @@ RISCVTTIImpl::getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, return TTI::TCC_Free; } +bool RISCVTTIImpl::hasActiveVectorLength(unsigned, Type *DataTy, Align) const { + return ST->hasVInstructions(); +} + TargetTransformInfo::PopcntSupportKind RISCVTTIImpl::getPopcntSupport(unsigned TyWidth) { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h index ac32aea..c0169ea 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h +++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.h @@ -78,6 +78,22 @@ public: const APInt &Imm, Type *Ty, TTI::TargetCostKind CostKind); + /// \name EVL Support for predicated vectorization. + /// Whether the target supports the %evl parameter of VP intrinsic efficiently + /// in hardware, for the given opcode and type/alignment. (see LLVM Language + /// Reference - "Vector Predication Intrinsics", + /// https://llvm.org/docs/LangRef.html#vector-predication-intrinsics and + /// "IR-level VP intrinsics", + /// https://llvm.org/docs/Proposals/VectorPredication.html#ir-level-vp-intrinsics). + /// \param Opcode the opcode of the instruction checked for predicated version + /// support. + /// \param DataType the type of the instruction with the \p Opcode checked for + /// prediction support. + /// \param Alignment the alignment for memory access operation checked for + /// predicated version support. + bool hasActiveVectorLength(unsigned Opcode, Type *DataType, + Align Alignment) const; + TargetTransformInfo::PopcntSupportKind getPopcntSupport(unsigned TyWidth); bool shouldExpandReduction(const IntrinsicInst *II) const; diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 0834865..cb0fd06 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -124,6 +124,7 @@ #include "llvm/IR/User.h" #include "llvm/IR/Value.h" #include "llvm/IR/ValueHandle.h" +#include "llvm/IR/VectorBuilder.h" #include "llvm/IR/Verifier.h" #include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" @@ -248,10 +249,12 @@ static cl::opt ForceTailFoldingStyle( clEnumValN(TailFoldingStyle::DataAndControlFlow, "data-and-control", "Create lane mask using active.lane.mask intrinsic, and use " "it for both data and control flow"), - clEnumValN( - TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, - "data-and-control-without-rt-check", - "Similar to data-and-control, but remove the runtime check"))); + clEnumValN(TailFoldingStyle::DataAndControlFlowWithoutRuntimeCheck, + "data-and-control-without-rt-check", + "Similar to data-and-control, but remove the runtime check"), + clEnumValN(TailFoldingStyle::DataWithEVL, "data-with-evl", + "Use predicated EVL instructions for tail folding. If EVL " + "is unsupported, fallback to data-without-lane-mask."))); static cl::opt MaximizeBandwidth( "vectorizer-maximize-bandwidth", cl::init(false), cl::Hidden, @@ -1505,29 +1508,62 @@ public: /// Returns the TailFoldingStyle that is best for the current loop. TailFoldingStyle getTailFoldingStyle(bool IVUpdateMayOverflow = true) const { - return IVUpdateMayOverflow ? ChosenTailFoldingStyle.first - : ChosenTailFoldingStyle.second; + if (!ChosenTailFoldingStyle) + return TailFoldingStyle::None; + return IVUpdateMayOverflow ? ChosenTailFoldingStyle->first + : ChosenTailFoldingStyle->second; } /// Selects and saves TailFoldingStyle for 2 options - if IV update may /// overflow or not. - void setTailFoldingStyles() { - assert(ChosenTailFoldingStyle.first == TailFoldingStyle::None && - ChosenTailFoldingStyle.second == TailFoldingStyle::None && - "Tail folding must not be selected yet."); - if (!Legal->prepareToFoldTailByMasking()) + /// \param IsScalableVF true if scalable vector factors enabled. + /// \param UserIC User specific interleave count. + void setTailFoldingStyles(bool IsScalableVF, unsigned UserIC) { + assert(!ChosenTailFoldingStyle && "Tail folding must not be selected yet."); + if (!Legal->prepareToFoldTailByMasking()) { + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); return; + } - if (ForceTailFoldingStyle.getNumOccurrences()) { - ChosenTailFoldingStyle.first = ChosenTailFoldingStyle.second = - ForceTailFoldingStyle; + if (!ForceTailFoldingStyle.getNumOccurrences()) { + ChosenTailFoldingStyle = std::make_pair( + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true), + TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false)); return; } - ChosenTailFoldingStyle.first = - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/true); - ChosenTailFoldingStyle.second = - TTI.getPreferredTailFoldingStyle(/*IVUpdateMayOverflow=*/false); + // Set styles when forced. + ChosenTailFoldingStyle = std::make_pair(ForceTailFoldingStyle.getValue(), + ForceTailFoldingStyle.getValue()); + if (ForceTailFoldingStyle != TailFoldingStyle::DataWithEVL) + return; + // Override forced styles if needed. + // FIXME: use actual opcode/data type for analysis here. + // FIXME: Investigate opportunity for fixed vector factor. + bool EVLIsLegal = + IsScalableVF && UserIC <= 1 && + TTI.hasActiveVectorLength(0, nullptr, Align()) && + !EnableVPlanNativePath && + // FIXME: implement support for max safe dependency distance. + Legal->isSafeForAnyVectorWidth() && + // FIXME: remove this once reductions are supported. + Legal->getReductionVars().empty(); + if (!EVLIsLegal) { + // If for some reason EVL mode is unsupported, fallback to + // DataWithoutLaneMask to try to vectorize the loop with folded tail + // in a generic way. + ChosenTailFoldingStyle = + std::make_pair(TailFoldingStyle::DataWithoutLaneMask, + TailFoldingStyle::DataWithoutLaneMask); + LLVM_DEBUG( + dbgs() + << "LV: Preference for VP intrinsics indicated. Will " + "not try to generate VP Intrinsics " + << (UserIC > 1 + ? "since interleave count specified is greater than 1.\n" + : "due to non-interleaving reasons.\n")); + } } /// Returns true if all loop blocks should be masked to fold tail loop. @@ -1544,6 +1580,18 @@ public: return foldTailByMasking() || Legal->blockNeedsPredication(BB); } + /// Returns true if VP intrinsics with explicit vector length support should + /// be generated in the tail folded loop. + bool foldTailWithEVL() const { + return getTailFoldingStyle() == TailFoldingStyle::DataWithEVL && + // FIXME: remove this once vp_reverse is supported. + none_of( + WideningDecisions, + [](const std::pair, + std::pair> + &Data) { return Data.second.first == CM_Widen_Reverse; }); + } + /// Returns true if the Phi is part of an inloop reduction. bool isInLoopReduction(PHINode *Phi) const { return InLoopReductions.contains(Phi); @@ -1688,8 +1736,8 @@ private: /// Control finally chosen tail folding style. The first element is used if /// the IV update may overflow, the second element - if it does not. - std::pair ChosenTailFoldingStyle = - std::make_pair(TailFoldingStyle::None, TailFoldingStyle::None); + std::optional> + ChosenTailFoldingStyle; /// A map holding scalar costs for different vectorization factors. The /// presence of a cost for an instruction in the mapping indicates that the @@ -4647,9 +4695,24 @@ LoopVectorizationCostModel::computeMaxVF(ElementCount UserVF, unsigned UserIC) { // found modulo the vectorization factor is not zero, try to fold the tail // by masking. // FIXME: look for a smaller MaxVF that does divide TC rather than masking. - setTailFoldingStyles(); - if (foldTailByMasking()) + setTailFoldingStyles(MaxFactors.ScalableVF.isScalable(), UserIC); + if (foldTailByMasking()) { + if (getTailFoldingStyle() == TailFoldingStyle::DataWithEVL) { + LLVM_DEBUG( + dbgs() + << "LV: tail is folded with EVL, forcing unroll factor to be 1. Will " + "try to generate VP Intrinsics with scalable vector " + "factors only.\n"); + // Tail folded loop using VP intrinsics restricts the VF to be scalable + // for now. + // TODO: extend it for fixed vectors, if required. + assert(MaxFactors.ScalableVF.isScalable() && + "Expected scalable vector factor."); + + MaxFactors.FixedVF = ElementCount::getFixed(1); + } return MaxFactors; + } // If there was a tail-folding hint/switch, but we can't fold the tail by // masking, fallback to a vectorization with a scalar epilogue. @@ -5257,6 +5320,13 @@ LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, if (!isScalarEpilogueAllowed()) return 1; + // Do not interleave if EVL is preferred and no User IC is specified. + if (foldTailWithEVL()) { + LLVM_DEBUG(dbgs() << "LV: Preference for VP intrinsics indicated. " + "Unroll factor forced to be 1.\n"); + return 1; + } + // We used the distance for the interleave count. if (!Legal->isSafeForAnyVectorWidth()) return 1; @@ -8487,6 +8557,9 @@ void LoopVectorizationPlanner::buildVPlansWithVPRecipes(ElementCount MinVF, VPlanTransforms::truncateToMinimalBitwidths( *Plan, CM.getMinimalBitwidths(), PSE.getSE()->getContext()); VPlanTransforms::optimize(*Plan, *PSE.getSE()); + // TODO: try to put it close to addActiveLaneMask(). + if (CM.foldTailWithEVL()) + VPlanTransforms::addExplicitVectorLength(*Plan); assert(verifyVPlanIsValid(*Plan) && "VPlan is invalid"); VPlans.push_back(std::move(Plan)); } @@ -9179,7 +9252,7 @@ void VPDerivedIVRecipe::execute(VPTransformState &State) { State.Builder.setFastMathFlags(FPBinOp->getFastMathFlags()); Value *Step = State.get(getStepValue(), VPIteration(0, 0)); - Value *CanonicalIV = State.get(getCanonicalIV(), VPIteration(0, 0)); + Value *CanonicalIV = State.get(getOperand(1), VPIteration(0, 0)); Value *DerivedIV = emitTransformedIndex( State.Builder, CanonicalIV, getStartValue()->getLiveInIRValue(), Step, Kind, cast_if_present(FPBinOp)); @@ -9307,6 +9380,52 @@ void VPReplicateRecipe::execute(VPTransformState &State) { State.ILV->scalarizeInstruction(UI, this, VPIteration(Part, Lane), State); } +/// Creates either vp_store or vp_scatter intrinsics calls to represent +/// predicated store/scatter. +static Instruction * +lowerStoreUsingVectorIntrinsics(IRBuilderBase &Builder, Value *Addr, + Value *StoredVal, bool IsScatter, Value *Mask, + Value *EVL, const Align &Alignment) { + CallInst *Call; + if (IsScatter) { + Call = Builder.CreateIntrinsic(Type::getVoidTy(EVL->getContext()), + Intrinsic::vp_scatter, + {StoredVal, Addr, Mask, EVL}); + } else { + VectorBuilder VBuilder(Builder); + VBuilder.setEVL(EVL).setMask(Mask); + Call = cast(VBuilder.createVectorInstruction( + Instruction::Store, Type::getVoidTy(EVL->getContext()), + {StoredVal, Addr})); + } + Call->addParamAttr( + 1, Attribute::getWithAlignment(Call->getContext(), Alignment)); + return Call; +} + +/// Creates either vp_load or vp_gather intrinsics calls to represent +/// predicated load/gather. +static Instruction *lowerLoadUsingVectorIntrinsics(IRBuilderBase &Builder, + VectorType *DataTy, + Value *Addr, bool IsGather, + Value *Mask, Value *EVL, + const Align &Alignment) { + CallInst *Call; + if (IsGather) { + Call = + Builder.CreateIntrinsic(DataTy, Intrinsic::vp_gather, {Addr, Mask, EVL}, + nullptr, "wide.masked.gather"); + } else { + VectorBuilder VBuilder(Builder); + VBuilder.setEVL(EVL).setMask(Mask); + Call = cast(VBuilder.createVectorInstruction( + Instruction::Load, DataTy, Addr, "vp.op.load")); + } + Call->addParamAttr( + 0, Attribute::getWithAlignment(Call->getContext(), Alignment)); + return Call; +} + void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { VPValue *StoredValue = isStore() ? getStoredValue() : nullptr; @@ -9345,7 +9464,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { for (unsigned Part = 0; Part < State.UF; ++Part) { Instruction *NewSI = nullptr; Value *StoredVal = State.get(StoredValue, Part); - if (CreateGatherScatter) { + // TODO: split this into several classes for better design. + if (State.EVL) { + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + assert(cast(State.EVL)->getOpcode() == + VPInstruction::ExplicitVectorLength && + "EVL must be VPInstruction::ExplicitVectorLength."); + Value *EVL = State.get(State.EVL, VPIteration(0, 0)); + // If EVL is not nullptr, then EVL must be a valid value set during plan + // creation, possibly default value = whole vector register length. EVL + // is created only if TTI prefers predicated vectorization, thus if EVL + // is not nullptr it also implies preference for predicated + // vectorization. + // FIXME: Support reverse store after vp_reverse is added. + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + NewSI = lowerStoreUsingVectorIntrinsics( + Builder, State.get(getAddr(), Part, !CreateGatherScatter), + StoredVal, CreateGatherScatter, MaskPart, EVL, Alignment); + } else if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; Value *VectorGep = State.get(getAddr(), Part); NewSI = Builder.CreateMaskedScatter(StoredVal, VectorGep, Alignment, @@ -9375,7 +9512,25 @@ void VPWidenMemoryInstructionRecipe::execute(VPTransformState &State) { State.setDebugLocFrom(getDebugLoc()); for (unsigned Part = 0; Part < State.UF; ++Part) { Value *NewLI; - if (CreateGatherScatter) { + // TODO: split this into several classes for better design. + if (State.EVL) { + assert(State.UF == 1 && "Expected only UF == 1 when vectorizing with " + "explicit vector length."); + assert(cast(State.EVL)->getOpcode() == + VPInstruction::ExplicitVectorLength && + "EVL must be VPInstruction::ExplicitVectorLength."); + Value *EVL = State.get(State.EVL, VPIteration(0, 0)); + // If EVL is not nullptr, then EVL must be a valid value set during plan + // creation, possibly default value = whole vector register length. EVL + // is created only if TTI prefers predicated vectorization, thus if EVL + // is not nullptr it also implies preference for predicated + // vectorization. + // FIXME: Support reverse loading after vp_reverse is added. + Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; + NewLI = lowerLoadUsingVectorIntrinsics( + Builder, DataTy, State.get(getAddr(), Part, !CreateGatherScatter), + CreateGatherScatter, MaskPart, EVL, Alignment); + } else if (CreateGatherScatter) { Value *MaskPart = isMaskRequired ? BlockInMaskParts[Part] : nullptr; Value *VectorGep = State.get(getAddr(), Part); NewLI = Builder.CreateMaskedGather(DataTy, VectorGep, Alignment, MaskPart, diff --git a/llvm/lib/Transforms/Vectorize/VPlan.cpp b/llvm/lib/Transforms/Vectorize/VPlan.cpp index f0b7008..8ebd75d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlan.cpp @@ -871,13 +871,15 @@ void VPlan::execute(VPTransformState *State) { // only a single part is generated, which provides the last part from the // previous iteration. For non-ordered reductions all UF parts are // generated. - bool SinglePartNeeded = isa(PhiR) || - isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isOrdered()); - bool NeedsScalar = isa(PhiR) || - (isa(PhiR) && - cast(PhiR)->isInLoop()); + bool SinglePartNeeded = + isa(PhiR) || + isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isOrdered()); + bool NeedsScalar = + isa(PhiR) || + (isa(PhiR) && + cast(PhiR)->isInLoop()); unsigned LastPartForNewPhi = SinglePartNeeded ? 1 : State->UF; for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { diff --git a/llvm/lib/Transforms/Vectorize/VPlan.h b/llvm/lib/Transforms/Vectorize/VPlan.h index 813ebda..77577b5 100644 --- a/llvm/lib/Transforms/Vectorize/VPlan.h +++ b/llvm/lib/Transforms/Vectorize/VPlan.h @@ -242,6 +242,15 @@ struct VPTransformState { ElementCount VF; unsigned UF; + /// If EVL (Explicit Vector Length) is not nullptr, then EVL must be a valid + /// value set during plan transformation, possibly a default value = whole + /// vector register length. EVL is created only if TTI prefers predicated + /// vectorization, thus if EVL is not nullptr it also implies preference for + /// predicated vectorization. + /// TODO: this is a temporarily solution, the EVL must be explicitly used by + /// the recipes and must be removed here. + VPValue *EVL = nullptr; + /// Hold the indices to generate specific scalar instructions. Null indicates /// that all instances are to be generated, using either scalar or vector /// instructions. @@ -1159,6 +1168,7 @@ public: SLPLoad, SLPStore, ActiveLaneMask, + ExplicitVectorLength, CalculateTripCountMinusVF, // Increment the canonical IV separately for each unrolled part. CanonicalIVIncrementForPart, @@ -2489,6 +2499,45 @@ public: #endif }; +/// A recipe for generating the phi node for the current index of elements, +/// adjusted in accordance with EVL value. It starts at the start value of the +/// canonical induction and gets incremented by EVL in each iteration of the +/// vector loop. +class VPEVLBasedIVPHIRecipe : public VPHeaderPHIRecipe { +public: + VPEVLBasedIVPHIRecipe(VPValue *StartIV, DebugLoc DL) + : VPHeaderPHIRecipe(VPDef::VPEVLBasedIVPHISC, nullptr, StartIV, DL) {} + + ~VPEVLBasedIVPHIRecipe() override = default; + + VPEVLBasedIVPHIRecipe *clone() override { + llvm_unreachable("cloning not implemented yet"); + } + + VP_CLASSOF_IMPL(VPDef::VPEVLBasedIVPHISC) + + static inline bool classof(const VPHeaderPHIRecipe *D) { + return D->getVPDefID() == VPDef::VPEVLBasedIVPHISC; + } + + /// Generate phi for handling IV based on EVL over iterations correctly. + /// TODO: investigate if it can share the code with VPCanonicalIVPHIRecipe. + void execute(VPTransformState &State) override; + + /// Returns true if the recipe only uses the first lane of operand \p Op. + bool onlyFirstLaneUsed(const VPValue *Op) const override { + assert(is_contained(operands(), Op) && + "Op must be an operand of the recipe"); + return true; + } + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + /// Print the recipe. + void print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const override; +#endif +}; + /// A Recipe for widening the canonical induction variable of the vector loop. class VPWidenCanonicalIVRecipe : public VPSingleDefRecipe { public: @@ -2522,8 +2571,8 @@ public: } }; -/// A recipe for converting the canonical IV value to the corresponding value of -/// an IV with different start and step values, using Start + CanonicalIV * +/// A recipe for converting the input value \p IV value to the corresponding +/// value of an IV with different start and step values, using Start + IV * /// Step. class VPDerivedIVRecipe : public VPSingleDefRecipe { /// Kind of the induction. @@ -2541,16 +2590,16 @@ public: Start, CanonicalIV, Step) {} VPDerivedIVRecipe(InductionDescriptor::InductionKind Kind, - const FPMathOperator *FPBinOp, VPValue *Start, - VPCanonicalIVPHIRecipe *CanonicalIV, VPValue *Step) - : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, CanonicalIV, Step}), - Kind(Kind), FPBinOp(FPBinOp) {} + const FPMathOperator *FPBinOp, VPValue *Start, VPValue *IV, + VPValue *Step) + : VPSingleDefRecipe(VPDef::VPDerivedIVSC, {Start, IV, Step}), Kind(Kind), + FPBinOp(FPBinOp) {} ~VPDerivedIVRecipe() override = default; VPRecipeBase *clone() override { - return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), - getCanonicalIV(), getStepValue()); + return new VPDerivedIVRecipe(Kind, FPBinOp, getStartValue(), getOperand(1), + getStepValue()); } VP_CLASSOF_IMPL(VPDef::VPDerivedIVSC) @@ -2570,9 +2619,6 @@ public: } VPValue *getStartValue() const { return getOperand(0); } - VPCanonicalIVPHIRecipe *getCanonicalIV() const { - return cast(getOperand(1)); - } VPValue *getStepValue() const { return getOperand(2); } /// Returns true if the recipe only uses the first lane of operand \p Op. diff --git a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp index 04e3031..c8ae2ee 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanAnalysis.cpp @@ -216,14 +216,14 @@ Type *VPTypeAnalysis::inferScalarType(const VPValue *V) { Type *ResultTy = TypeSwitch(V->getDefiningRecipe()) .Case( - [this](const auto *R) { - // Handle header phi recipes, except VPWienIntOrFpInduction - // which needs special handling due it being possibly truncated. - // TODO: consider inferring/caching type of siblings, e.g., - // backedge value, here and in cases below. - return inferScalarType(R->getStartValue()); - }) + VPReductionPHIRecipe, VPWidenPointerInductionRecipe, + VPEVLBasedIVPHIRecipe>([this](const auto *R) { + // Handle header phi recipes, except VPWidenIntOrFpInduction + // which needs special handling due it being possibly truncated. + // TODO: consider inferring/caching type of siblings, e.g., + // backedge value, here and in cases below. + return inferScalarType(R->getStartValue()); + }) .Case( [](const auto *R) { return R->getScalarType(); }) .CasegetType(), 0); return Builder.CreateSelect(Cmp, Sub, Zero); } + case VPInstruction::ExplicitVectorLength: { + // Compute EVL + auto GetEVL = [=](VPTransformState &State, Value *AVL) { + assert(AVL->getType()->isIntegerTy() && + "Requested vector length should be an integer."); + + // TODO: Add support for MaxSafeDist for correct loop emission. + assert(State.VF.isScalable() && "Expected scalable vector factor."); + Value *VFArg = State.Builder.getInt32(State.VF.getKnownMinValue()); + + Value *EVL = State.Builder.CreateIntrinsic( + State.Builder.getInt32Ty(), Intrinsic::experimental_get_vector_length, + {AVL, VFArg, State.Builder.getTrue()}); + return EVL; + }; + // TODO: Restructure this code with an explicit remainder loop, vsetvli can + // be outside of the main loop. + assert(Part == 0 && "No unrolling expected for predicated vectorization."); + // Compute VTC - IV as the AVL (requested vector length). + Value *Index = State.get(getOperand(0), VPIteration(0, 0)); + Value *TripCount = State.get(getOperand(1), VPIteration(0, 0)); + Value *AVL = State.Builder.CreateSub(TripCount, Index); + Value *EVL = GetEVL(State, AVL); + assert(!State.EVL && "multiple EVL recipes"); + State.EVL = this; + return EVL; + } case VPInstruction::CanonicalIVIncrementForPart: { auto *IV = State.get(getOperand(0), VPIteration(0, 0)); if (Part == 0) @@ -592,6 +620,7 @@ bool VPInstruction::onlyFirstLaneUsed(const VPValue *Op) const { // TODO: Cover additional opcodes. return vputils::onlyFirstLaneUsed(this); case VPInstruction::ActiveLaneMask: + case VPInstruction::ExplicitVectorLength: case VPInstruction::CalculateTripCountMinusVF: case VPInstruction::CanonicalIVIncrementForPart: case VPInstruction::BranchOnCount: @@ -628,6 +657,9 @@ void VPInstruction::print(raw_ostream &O, const Twine &Indent, case VPInstruction::ActiveLaneMask: O << "active lane mask"; break; + case VPInstruction::ExplicitVectorLength: + O << "EXPLICIT-VECTOR-LENGTH"; + break; case VPInstruction::FirstOrderRecurrenceSplice: O << "first-order splice"; break; @@ -1184,7 +1216,7 @@ void VPDerivedIVRecipe::print(raw_ostream &O, const Twine &Indent, O << Indent << "= DERIVED-IV "; getStartValue()->printAsOperand(O, SlotTracker); O << " + "; - getCanonicalIV()->printAsOperand(O, SlotTracker); + getOperand(1)->printAsOperand(O, SlotTracker); O << " * "; getStepValue()->printAsOperand(O, SlotTracker); } @@ -1974,3 +2006,25 @@ void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, printOperands(O, SlotTracker); } #endif + +void VPEVLBasedIVPHIRecipe::execute(VPTransformState &State) { + BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); + assert(State.UF == 1 && "Expected unroll factor 1 for VP vectorization."); + Value *Start = State.get(getOperand(0), VPIteration(0, 0)); + PHINode *EntryPart = + State.Builder.CreatePHI(Start->getType(), 2, "evl.based.iv"); + EntryPart->addIncoming(Start, VectorPH); + EntryPart->setDebugLoc(getDebugLoc()); + State.set(this, EntryPart, 0, /*IsScalar=*/true); +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void VPEVLBasedIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, + VPSlotTracker &SlotTracker) const { + O << Indent << "EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI "; + + printAsOperand(O, SlotTracker); + O << " = phi "; + printOperands(O, SlotTracker); +} +#endif diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 7d4e54d..1256e4d 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1209,6 +1209,45 @@ static VPActiveLaneMaskPHIRecipe *addVPLaneMaskPhiAndUpdateExitBranch( return LaneMaskPhi; } +/// Replaces (ICMP_ULE, WideCanonicalIV, backedge-taken-count) pattern using +/// the given \p Idiom. +static void +replaceHeaderPredicateWith(VPlan &Plan, VPValue &Idiom, + function_ref Cond = {}) { + auto *FoundWidenCanonicalIVUser = + find_if(Plan.getCanonicalIV()->users(), + [](VPUser *U) { return isa(U); }); + if (FoundWidenCanonicalIVUser == Plan.getCanonicalIV()->users().end()) + return; + auto *WideCanonicalIV = + cast(*FoundWidenCanonicalIVUser); + // Walk users of WideCanonicalIV and replace all compares of the form + // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with + // the given idiom VPValue. + VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); + for (VPUser *U : SmallVector(WideCanonicalIV->users())) { + auto *CompareToReplace = dyn_cast(U); + if (!CompareToReplace || + CompareToReplace->getOpcode() != Instruction::ICmp || + CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || + CompareToReplace->getOperand(1) != BTC) + continue; + + assert(CompareToReplace->getOperand(0) == WideCanonicalIV && + "WidenCanonicalIV must be the first operand of the compare"); + if (Cond) { + CompareToReplace->replaceUsesWithIf(&Idiom, Cond); + if (!CompareToReplace->getNumUsers()) + CompareToReplace->eraseFromParent(); + } else { + CompareToReplace->replaceAllUsesWith(&Idiom); + CompareToReplace->eraseFromParent(); + } + } + if (!WideCanonicalIV->getNumUsers()) + WideCanonicalIV->eraseFromParent(); +} + void VPlanTransforms::addActiveLaneMask( VPlan &Plan, bool UseActiveLaneMaskForControlFlow, bool DataAndControlFlowWithoutRuntimeCheck) { @@ -1238,20 +1277,77 @@ void VPlanTransforms::addActiveLaneMask( // Walk users of WideCanonicalIV and replace all compares of the form // (ICMP_ULE, WideCanonicalIV, backedge-taken-count) with an // active-lane-mask. - VPValue *BTC = Plan.getOrCreateBackedgeTakenCount(); - for (VPUser *U : SmallVector(WideCanonicalIV->users())) { - auto *CompareToReplace = dyn_cast(U); - if (!CompareToReplace || - CompareToReplace->getOpcode() != Instruction::ICmp || - CompareToReplace->getPredicate() != CmpInst::ICMP_ULE || - CompareToReplace->getOperand(1) != BTC) - continue; + replaceHeaderPredicateWith(Plan, *LaneMask); +} - assert(CompareToReplace->getOperand(0) == WideCanonicalIV && - "WidenCanonicalIV must be the first operand of the compare"); - CompareToReplace->replaceAllUsesWith(LaneMask); - CompareToReplace->eraseFromParent(); +/// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and +/// replaces all uses except the canonical IV increment of +/// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. VPCanonicalIVPHIRecipe +/// is used only for loop iterations counting after this transformation. +/// +/// The function uses the following definitions: +/// %StartV is the canonical induction start value. +/// +/// The function adds the following recipes: +/// +/// vector.ph: +/// ... +/// +/// vector.body: +/// ... +/// %EVLPhi = EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI [ %StartV, %vector.ph ], +/// [ %NextEVLIV, %vector.body ] +/// %VPEVL = EXPLICIT-VECTOR-LENGTH %EVLPhi, original TC +/// ... +/// %NextEVLIV = add IVSize (cast i32 %VPEVVL to IVSize), %EVLPhi +/// ... +/// +void VPlanTransforms::addExplicitVectorLength(VPlan &Plan) { + VPBasicBlock *Header = Plan.getVectorLoopRegion()->getEntryBasicBlock(); + auto *CanonicalIVPHI = Plan.getCanonicalIV(); + VPValue *StartV = CanonicalIVPHI->getStartValue(); + + // TODO: revisit this and try to remove the mask operand. + // Walk VPWidenMemoryInstructionRecipe users of WideCanonicalIV and replace + // all compares of the form (ICMP_ULE, WideCanonicalIV, backedge-taken-count), + // used as mask in VPWidenMemoryInstructionRecipe, with an all-true-mask. + Value *TrueMask = + ConstantInt::getTrue(CanonicalIVPHI->getScalarType()->getContext()); + VPValue *VPTrueMask = Plan.getOrAddLiveIn(TrueMask); + replaceHeaderPredicateWith(Plan, *VPTrueMask, [](VPUser &U, unsigned) { + return isa(U); + }); + // Now create the ExplicitVectorLengthPhi recipe in the main loop. + auto *EVLPhi = new VPEVLBasedIVPHIRecipe(StartV, DebugLoc()); + EVLPhi->insertAfter(CanonicalIVPHI); + auto *VPEVL = new VPInstruction(VPInstruction::ExplicitVectorLength, + {EVLPhi, Plan.getTripCount()}); + VPEVL->insertBefore(*Header, Header->getFirstNonPhi()); + + auto *CanonicalIVIncrement = + cast(CanonicalIVPHI->getBackedgeValue()); + VPSingleDefRecipe *OpVPEVL = VPEVL; + if (unsigned IVSize = CanonicalIVPHI->getScalarType()->getScalarSizeInBits(); + IVSize != 32) { + OpVPEVL = new VPScalarCastRecipe(IVSize < 32 ? Instruction::Trunc + : Instruction::ZExt, + OpVPEVL, CanonicalIVPHI->getScalarType()); + OpVPEVL->insertBefore(CanonicalIVIncrement); } + auto *NextEVLIV = + new VPInstruction(Instruction::Add, {OpVPEVL, EVLPhi}, + {CanonicalIVIncrement->hasNoUnsignedWrap(), + CanonicalIVIncrement->hasNoSignedWrap()}, + CanonicalIVIncrement->getDebugLoc(), "index.evl.next"); + NextEVLIV->insertBefore(CanonicalIVIncrement); + EVLPhi->addOperand(NextEVLIV); + + // Replace all uses of VPCanonicalIVPHIRecipe by + // VPEVLBasedIVPHIRecipe except for the canonical IV increment. + CanonicalIVPHI->replaceAllUsesWith(EVLPhi); + CanonicalIVIncrement->setOperand(0, CanonicalIVPHI); + // TODO: support unroll factor > 1. + Plan.setUF(1); } void VPlanTransforms::dropPoisonGeneratingRecipes( @@ -1277,9 +1373,7 @@ void VPlanTransforms::dropPoisonGeneratingRecipes( // handled. if (isa(CurRec) || isa(CurRec) || - isa(CurRec) || - isa(CurRec) || - isa(CurRec)) + isa(CurRec) || isa(CurRec)) continue; // This recipe contributes to the address computation of a widen diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h index ff83c3f..0cbc707 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.h +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.h @@ -98,6 +98,13 @@ struct VPlanTransforms { /// VPlan directly. static void dropPoisonGeneratingRecipes( VPlan &Plan, function_ref BlockNeedsPredication); + + /// Add a VPEVLBasedIVPHIRecipe and related recipes to \p Plan and + /// replaces all uses except the canonical IV increment of + /// VPCanonicalIVPHIRecipe with a VPEVLBasedIVPHIRecipe. + /// VPCanonicalIVPHIRecipe is only used to control the loop after + /// this transformation. + static void addExplicitVectorLength(VPlan &Plan); }; } // namespace llvm diff --git a/llvm/lib/Transforms/Vectorize/VPlanValue.h b/llvm/lib/Transforms/Vectorize/VPlanValue.h index 1d2c17e..8b221d3 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanValue.h +++ b/llvm/lib/Transforms/Vectorize/VPlanValue.h @@ -368,6 +368,7 @@ public: // VPHeaderPHIRecipe need to be kept together. VPCanonicalIVPHISC, VPActiveLaneMaskPHISC, + VPEVLBasedIVPHISC, VPFirstOrderRecurrencePHISC, VPWidenIntOrFpInductionSC, VPWidenPointerInductionSC, diff --git a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp index 7ebdb91..12d37fa 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanVerifier.cpp @@ -92,7 +92,50 @@ static bool verifyVPBasicBlock(const VPBasicBlock *VPBB, for (const VPRecipeBase &R : *VPBB) RecipeNumbering[&R] = Cnt++; + // Set of recipe types along with VPInstruction Opcodes of all EVL-related + // recipes that must appear at most once in the header block. + DenseSet EVLFound; + const VPRecipeBase *VPWidenMemRecipe = nullptr; + const VPlan *Plan = VPBB->getPlan(); + bool IsHeader = Plan->getEntry()->getNumSuccessors() == 1 && + Plan->getVectorLoopRegion()->getEntry() == VPBB; + auto CheckEVLRecipiesInsts = [&](const VPRecipeBase *R) { + if (isa(R)) { + if (!IsHeader) { + errs() << "EVL PHI recipe not in entry block!\n"; + return false; + } + if (!EVLFound.insert(VPDef::VPEVLBasedIVPHISC).second) { + errs() << "EVL PHI recipe inserted more than once!\n"; + return false; + } + return true; + } + if (const auto *RInst = dyn_cast(R); + RInst && RInst->getOpcode() == VPInstruction::ExplicitVectorLength) { + if (!IsHeader) { + errs() << "EVL instruction not in the header block!\n"; + return false; + } + if (!EVLFound.insert(RInst->getOpcode() + VPDef::VPLastPHISC).second) { + errs() << "EVL instruction inserted more than once!\n"; + return false; + } + if (VPWidenMemRecipe) { + errs() << "Use of EVL instruction by widen memory recipe before " + "definition!\n"; + return false; + } + return true; + } + if (isa(R)) + VPWidenMemRecipe = R; + return true; + }; + for (const VPRecipeBase &R : *VPBB) { + if (!CheckEVLRecipiesInsts(&R)) + return false; for (const VPValue *V : R.definedValues()) { for (const VPUser *U : V->users()) { auto *UI = dyn_cast(U); diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000..2ce2a45 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-force-tail-with-evl.ll @@ -0,0 +1,51 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -S < %s | FileCheck %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -S < %s | FileCheck %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; CHECK-LABEL: @foo( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_BODY:%.*]] +; CHECK: for.body: +; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IV]] +; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; CHECK-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; CHECK-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] +; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll new file mode 100644 index 0000000..5d1a471 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/PowerPC/vplan-force-tail-with-evl.ll @@ -0,0 +1,117 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=powerpc64le-unknown-linux-gnu \ +; RUN: -mcpu=pwr10 -disable-output < %s 2>&1 | FileCheck %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; CHECK-LABEL: VPlan 'Initial VPlan for VF={2,4},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in vp<%2> = backedge-taken count +; CHECK-NEXT: Live-in ir<%N> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%3> = CANONICAL-INDUCTION ir<0>, vp<%16> +; CHECK-NEXT: WIDEN-INDUCTION %iv = phi 0, %iv.next, ir<1> +; CHECK-NEXT: EMIT vp<%5> = icmp ule ir<%iv>, vp<%2> +; CHECK-NEXT: Successor(s): pred.store +; CHECK-EMPTY: +; CHECK-NEXT: pred.store: { +; CHECK-NEXT: pred.store.entry: +; CHECK-NEXT: BRANCH-ON-MASK vp<%5> +; CHECK-NEXT: Successor(s): pred.store.if, pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.if: +; CHECK-NEXT: vp<%6> = SCALAR-STEPS vp<%3>, ir<1> +; CHECK-NEXT: REPLICATE ir<%arrayidx> = getelementptr inbounds ir<%b>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%0> = load ir<%arrayidx> +; CHECK-NEXT: REPLICATE ir<%arrayidx2> = getelementptr inbounds ir<%c>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%1> = load ir<%arrayidx2> +; CHECK-NEXT: REPLICATE ir<%arrayidx4> = getelementptr inbounds ir<%a>, vp<%6> +; CHECK-NEXT: REPLICATE ir<%add> = add nsw ir<%1>, ir<%0> +; CHECK-NEXT: REPLICATE store ir<%add>, ir<%arrayidx4> +; CHECK-NEXT: Successor(s): pred.store.continue +; CHECK-EMPTY: +; CHECK-NEXT: pred.store.continue: +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%14> = ir<%0> +; CHECK-NEXT: PHI-PREDICATED-INSTRUCTION vp<%15> = ir<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; CHECK-NEXT: Successor(s): for.body.2 +; CHECK-EMPTY: +; CHECK-NEXT: for.body.2: +; CHECK-NEXT: EMIT vp<%16> = add vp<%3>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%16>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @safe_dep(ptr %p) { +; CHECK-LABEL: VPlan 'Initial VPlan for VF={2},UF>=1' { +; CHECK-NEXT: Live-in vp<%0> = VF * UF +; CHECK-NEXT: Live-in vp<%1> = vector-trip-count +; CHECK-NEXT: Live-in ir<512> = original trip-count +; CHECK-EMPTY: +; CHECK-NEXT: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<%2> = CANONICAL-INDUCTION ir<0>, vp<%10> +; CHECK-NEXT: vp<%3> = SCALAR-STEPS vp<%2>, ir<1> +; CHECK-NEXT: CLONE ir<%a1> = getelementptr ir<%p>, vp<%3> +; CHECK-NEXT: vp<%5> = vector-pointer ir<%a1> +; CHECK-NEXT: WIDEN ir<%v> = load vp<%5> +; CHECK-NEXT: CLONE ir<%offset> = add vp<%3>, ir<100> +; CHECK-NEXT: CLONE ir<%a2> = getelementptr ir<%p>, ir<%offset> +; CHECK-NEXT: vp<%9> = vector-pointer ir<%a2> +; CHECK-NEXT: WIDEN store vp<%9>, ir<%v> +; CHECK-NEXT: EMIT vp<%10> = add nuw vp<%2>, vp<%0> +; CHECK-NEXT: EMIT branch-on-count vp<%10>, vp<%1> +; CHECK-NEXT: No successors +; CHECK-NEXT: } +; +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 100 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 511 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll index 57e1dc9..b876e9d 100644 --- a/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll +++ b/llvm/test/Transforms/LoopVectorize/RISCV/inloop-reduction.ll @@ -1,11 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize < %s -S -o - | FileCheck %s -check-prefix=OUTLOOP ; RUN: opt -mtriple riscv64-linux-gnu -mattr=+v,+d -passes=loop-vectorize -prefer-inloop-reductions < %s -S -o - | FileCheck %s -check-prefix=INLOOP - +; RUN: opt -passes=loop-vectorize -force-tail-folding-style=data-with-evl -prefer-predicate-over-epilogue=predicate-dont-vectorize -mtriple=riscv64 -mattr=+v -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128" target triple = "riscv64" +; FIXME: inloop reductions are not supported yet with predicated vectorization. + define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; OUTLOOP-LABEL: @add_i16_i32( ; OUTLOOP-NEXT: entry: @@ -115,6 +117,70 @@ define i32 @add_i16_i32(ptr nocapture readonly %x, i32 %n) { ; INLOOP-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] ; INLOOP-NEXT: ret i32 [[R_0_LCSSA]] ; +; IF-EVL-LABEL: @add_i16_i32( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[N:%.*]], 0 +; IF-EVL-NEXT: br i1 [[CMP6]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]] +; IF-EVL: for.body.preheader: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i32 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP3:%.*]] = mul i32 [[TMP2]], 4 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i32 [[TMP3]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP4]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[N]], 1 +; IF-EVL-NEXT: [[TMP5:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i32 [[TMP5]], 4 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_PHI:%.*]] = phi [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i32 [[INDEX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i32 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i32() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP11:%.*]] = getelementptr inbounds i16, ptr [[X:%.*]], i32 [[TMP7]] +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i16, ptr [[TMP11]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i16.p0(ptr [[TMP12]], i32 2, [[TMP10]], poison) +; IF-EVL-NEXT: [[TMP13:%.*]] = sext [[WIDE_MASKED_LOAD]] to +; IF-EVL-NEXT: [[TMP14]] = add [[VEC_PHI]], [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = select [[TMP10]], [[TMP14]], [[VEC_PHI]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], [[TMP6]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.nxv4i32( [[TMP15]]) +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] +; IF-EVL-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[FOR_BODY_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[I_08:%.*]] = phi i32 [ [[INC:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[R_07:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[X]], i32 [[I_08]] +; IF-EVL-NEXT: [[TMP18:%.*]] = load i16, ptr [[ARRAYIDX]], align 2 +; IF-EVL-NEXT: [[CONV:%.*]] = sext i16 [[TMP18]] to i32 +; IF-EVL-NEXT: [[ADD]] = add nsw i32 [[R_07]], [[CONV]] +; IF-EVL-NEXT: [[INC]] = add nuw nsw i32 [[I_08]], 1 +; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup.loopexit: +; IF-EVL-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ] +; IF-EVL-NEXT: br label [[FOR_COND_CLEANUP]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: [[R_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ] +; IF-EVL-NEXT: ret i32 [[R_0_LCSSA]] +; entry: %cmp6 = icmp sgt i32 %n, 0 br i1 %cmp6, label %for.body, label %for.cond.cleanup diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll new file mode 100644 index 0000000..835ff37 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-gather-scatter.ll @@ -0,0 +1,116 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +define void @gather_scatter(ptr noalias %in, ptr noalias %out, ptr noalias %index, i64 %n) { +; IF-EVL-LABEL: @gather_scatter( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 2 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 2 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 2 +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv2i64() +; IF-EVL-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer +; IF-EVL-NEXT: [[TMP13:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 2 +; IF-EVL-NEXT: [[TMP16:%.*]] = mul i64 1, [[TMP15]] +; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP16]], i64 0 +; IF-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP18:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP17]], i32 2, i1 true) +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[INDEX:%.*]], [[VEC_IND]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.vp.gather.nxv2i64.nxv2p0( align 8 [[TMP20]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.vp.gather.nxv2f32.nxv2p0( align 4 [[TMP21]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: call void @llvm.vp.scatter.nxv2f32.nxv2p0( [[WIDE_MASKED_GATHER2]], align 4 [[TMP22]], [[TMP19]], i32 [[TMP18]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = zext i32 [[TMP18]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP23]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX1]], [[TMP10]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[VEC_IND]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX]], i64 [[INDVARS_IV]] +; IF-EVL-NEXT: [[TMP25:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; IF-EVL-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN]], i64 [[TMP25]] +; IF-EVL-NEXT: [[TMP26:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT]], i64 [[TMP25]] +; IF-EVL-NEXT: store float [[TMP26]], ptr [[ARRAYIDX7]], align 4 +; IF-EVL-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.end: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @gather_scatter( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[INDEX:%.*]], i64 [[INDVARS_IV]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i64, ptr [[ARRAYIDX3]], align 8 +; NO-VP-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, ptr [[IN:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP1:%.*]] = load float, ptr [[ARRAYIDX5]], align 4 +; NO-VP-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, ptr [[OUT:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: store float [[TMP1]], ptr [[ARRAYIDX7]], align 4 +; NO-VP-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N:%.*]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END:%.*]], label [[FOR_BODY]] +; NO-VP: for.end: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] + %arrayidx3 = getelementptr inbounds i32, ptr %index, i64 %indvars.iv + %0 = load i64, ptr %arrayidx3, align 8 + %arrayidx5 = getelementptr inbounds float, ptr %in, i64 %0 + %1 = load float, ptr %arrayidx5, align 4 + %arrayidx7 = getelementptr inbounds float, ptr %out, i64 %0 + store float %1, ptr %arrayidx7, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %n + br i1 %exitcond.not, label %for.end, label %for.body + +for.end: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll new file mode 100644 index 0000000..0b495bc --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-interleave.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +; FIXME: interleaved accesses are not supported yet with predicated vectorization. +define void @interleave(ptr noalias %a, ptr noalias %b, i64 %N) { +; IF-EVL-LABEL: @interleave( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP17:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP31:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP31]], 8 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP17]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 8 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 8 +; IF-EVL-NEXT: [[TMP32:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP32]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 8 +; IF-EVL-NEXT: [[TMP11:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP12:%.*]] = add [[TMP11]], zeroinitializer +; IF-EVL-NEXT: [[TMP13:%.*]] = mul [[TMP12]], shufflevector ( insertelement ( poison, i64 1, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[INDUCTION:%.*]] = add zeroinitializer, [[TMP13]] +; IF-EVL-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 +; IF-EVL-NEXT: [[TMP37:%.*]] = mul i64 1, [[TMP15]] +; IF-EVL-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement poison, i64 [[TMP37]], i64 0 +; IF-EVL-NEXT: [[DOTSPLAT:%.*]] = shufflevector [[DOTSPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[VEC_IND:%.*]] = phi [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[STEP_ADD:%.*]] = add [[VEC_IND]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; IF-EVL-NEXT: [[TMP18:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP19:%.*]] = mul i64 [[TMP18]], 4 +; IF-EVL-NEXT: [[TMP38:%.*]] = add i64 [[TMP19]], 0 +; IF-EVL-NEXT: [[TMP39:%.*]] = mul i64 [[TMP38]], 1 +; IF-EVL-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], [[TMP39]] +; IF-EVL-NEXT: [[TMP23:%.*]] = icmp ule [[VEC_IND]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP24:%.*]] = icmp ule [[STEP_ADD]], [[BROADCAST_SPLAT]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], [[VEC_IND]], i32 0 +; IF-EVL-NEXT: [[TMP26:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP25]], i32 4, [[TMP23]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER2:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP26]], i32 4, [[TMP24]], poison) +; IF-EVL-NEXT: [[TMP27:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[VEC_IND]], i32 1 +; IF-EVL-NEXT: [[TMP28:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], [[STEP_ADD]], i32 1 +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER3:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP27]], i32 4, [[TMP23]], poison) +; IF-EVL-NEXT: [[WIDE_MASKED_GATHER4:%.*]] = call @llvm.masked.gather.nxv4i32.nxv4p0( [[TMP28]], i32 4, [[TMP24]], poison) +; IF-EVL-NEXT: [[TMP29:%.*]] = add nsw [[WIDE_MASKED_GATHER3]], [[WIDE_MASKED_GATHER]] +; IF-EVL-NEXT: [[TMP30:%.*]] = add nsw [[WIDE_MASKED_GATHER4]], [[WIDE_MASKED_GATHER2]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; IF-EVL-NEXT: [[TMP33:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i32 0 +; IF-EVL-NEXT: [[TMP34:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP35:%.*]] = mul i64 [[TMP34]], 4 +; IF-EVL-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 [[TMP35]] +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP29]], ptr [[TMP33]], i32 4, [[TMP23]]) +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[TMP30]], ptr [[TMP36]], i32 4, [[TMP24]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[VEC_IND_NEXT]] = add [[STEP_ADD]], [[DOTSPLAT]] +; IF-EVL-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 +; IF-EVL-NEXT: [[TMP21:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 +; IF-EVL-NEXT: [[TMP22:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP21]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @interleave( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 16 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 16 +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP10:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 8 +; NO-VP-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B:%.*]], i64 [[TMP10]], i32 0 +; NO-VP-NEXT: [[TMP3:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[TMP1]], i32 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0 +; NO-VP-NEXT: [[WIDE_VEC:%.*]] = load <16 x i32>, ptr [[TMP4]], align 4 +; NO-VP-NEXT: [[WIDE_VEC1:%.*]] = load <16 x i32>, ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC2:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC3:%.*]] = shufflevector <16 x i32> [[WIDE_VEC]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[STRIDED_VEC4:%.*]] = shufflevector <16 x i32> [[WIDE_VEC1]], <16 x i32> poison, <8 x i32> +; NO-VP-NEXT: [[TMP6:%.*]] = add nsw <8 x i32> [[STRIDED_VEC3]], [[STRIDED_VEC]] +; NO-VP-NEXT: [[TMP7:%.*]] = add nsw <8 x i32> [[STRIDED_VEC4]], [[STRIDED_VEC2]] +; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP10]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 8 +; NO-VP-NEXT: store <8 x i32> [[TMP6]], ptr [[TMP12]], align 4 +; NO-VP-NEXT: store <8 x i32> [[TMP7]], ptr [[TMP11]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16 +; NO-VP-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 0 +; NO-VP-NEXT: [[TMP29:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x i32], ptr [[B]], i64 [[IV]], i32 1 +; NO-VP-NEXT: [[TMP30:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP30]], [[TMP29]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds [2 x i32], ptr %b, i64 %iv, i32 0 + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds [2 x i32], ptr %b, i64 %iv, i32 1 + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body, !llvm.loop !0 + +for.cond.cleanup: + ret void +} + +!0 = distinct !{!0, !1, !2} +!1 = !{!"llvm.loop.interleave.count", i32 2} +!2 = !{!"llvm.loop.vectorize.enable", i1 true} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll new file mode 100644 index 0000000..d5ad99f --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-iv32.ll @@ -0,0 +1,124 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +define void @iv32(ptr noalias %a, ptr noalias %b, i32 %N) { +; IF-EVL-LABEL: @iv32( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP19:%.*]] = sub i32 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i32 [[TMP19]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[ENTRY:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i32 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i32 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i32 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i32 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP9:%.*]] = call i32 @llvm.vscale.i32() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i32 [[TMP9]], 4 +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INDEX_EVL_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i32 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i32(i32 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i32 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP13]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[VP_OP_LOAD]], ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i32 [[TMP12]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[IV_NEXT]] = add i32 [[IV]], [[TMP10]] +; IF-EVL-NEXT: [[TMP18:%.*]] = icmp eq i32 [[IV_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY1:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY1:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV1:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT1:%.*]], [[FOR_BODY1]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV1]] +; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV1]] +; IF-EVL-NEXT: store i32 [[TMP0]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT1]] = add nuw nsw i32 [[IV1]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT1]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY1]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @iv32( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP10:%.*]] = mul i32 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[N:%.*]], [[TMP10]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP1:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP11:%.*]] = mul i32 [[TMP1]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i32 [[N]], [[TMP11]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i32 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP2:%.*]] = call i32 @llvm.vscale.i32() +; NO-VP-NEXT: [[TMP12:%.*]] = mul i32 [[TMP2]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP3:%.*]] = add i32 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i32 [[TMP3]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i32 [[TMP3]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-NEXT: store [[WIDE_LOAD]], ptr [[TMP7]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP12]] +; NO-VP-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i32 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i32 [[IV]] +; NO-VP-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i32 [[IV]] +; NO-VP-NEXT: store i32 [[TMP9]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i32 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i32 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i32 %iv + store i32 %0, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i32 %iv, 1 + %exitcond.not = icmp eq i32 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll new file mode 100644 index 0000000..203d0c9 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-masked-loadstore.ll @@ -0,0 +1,132 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +define void @masked_loadstore(ptr noalias %a, ptr noalias %b, i64 %n) { +; IF-EVL-LABEL: @masked_loadstore( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector [[BROADCAST_SPLATINSERT1]], poison, zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[EVL_BASED_IV]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP14:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP15:%.*]] = add zeroinitializer, [[TMP14]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP15]] +; IF-EVL-NEXT: [[TMP16:%.*]] = icmp ule [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP18]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = icmp ne [[VP_OP_LOAD]], zeroinitializer +; IF-EVL-NEXT: [[TMP20:%.*]] = select [[TMP16]], [[TMP19]], zeroinitializer +; IF-EVL-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[A:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP22:%.*]] = getelementptr i32, ptr [[TMP21]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD3:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP23:%.*]] = add [[VP_OP_LOAD]], [[VP_OP_LOAD3]] +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP23]], ptr align 4 [[TMP22]], [[TMP20]], i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP24:%.*]] = zext i32 [[TMP12]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP24]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[I_011]] +; IF-EVL-NEXT: [[TMP26:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP26]], 0 +; IF-EVL-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; IF-EVL: if.then: +; IF-EVL-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[I_011]] +; IF-EVL-NEXT: [[TMP27:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add i32 [[TMP26]], [[TMP27]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; IF-EVL-NEXT: br label [[FOR_INC]] +; IF-EVL: for.inc: +; IF-EVL-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: exit: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @masked_loadstore( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[I_011:%.*]] = phi i64 [ [[INC:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[I_011]] +; NO-VP-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[CMP1:%.*]] = icmp ne i32 [[TMP0]], 0 +; NO-VP-NEXT: br i1 [[CMP1]], label [[IF_THEN:%.*]], label [[FOR_INC]] +; NO-VP: if.then: +; NO-VP-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[I_011]] +; NO-VP-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX3]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add i32 [[TMP0]], [[TMP1]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX3]], align 4 +; NO-VP-NEXT: br label [[FOR_INC]] +; NO-VP: for.inc: +; NO-VP-NEXT: [[INC]] = add nuw nsw i64 [[I_011]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INC]], [[N:%.*]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[FOR_BODY]] +; NO-VP: exit: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %i.011 = phi i64 [ %inc, %for.inc ], [ 0, %entry ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %i.011 + %0 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp ne i32 %0, 0 + br i1 %cmp1, label %if.then, label %for.inc + +if.then: + %arrayidx3 = getelementptr inbounds i32, ptr %a, i64 %i.011 + %1 = load i32, ptr %arrayidx3, align 4 + %add = add i32 %0, %1 + store i32 %add, ptr %arrayidx3, align 4 + br label %for.inc + +for.inc: + %inc = add nuw nsw i64 %i.011, 1 + %exitcond.not = icmp eq i64 %inc, %n + br i1 %exitcond.not, label %exit, label %for.body + +exit: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll new file mode 100644 index 0000000..1c49fba --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-no-masking.ll @@ -0,0 +1,36 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s + +; No need to emit predicated vector code if the vector instructions with masking are not required. +define i32 @no_masking() { +; CHECK-LABEL: @no_masking( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[BODY:%.*]] +; CHECK: body: +; CHECK-NEXT: [[P:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[BODY]] ] +; CHECK-NEXT: [[INC]] = add i32 [[P]], 1 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[INC]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[END:%.*]], label [[BODY]] +; CHECK: end: +; CHECK-NEXT: ret i32 0 +; +entry: + br label %body + +body: + %p = phi i32 [ 1, %entry ], [ %inc, %body ] + %inc = add i32 %p, 1 + %cmp = icmp eq i32 %inc, 0 + br i1 %cmp, label %end, label %body + +end: + ret i32 0 +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll new file mode 100644 index 0000000..f2222e0 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-force-tail-with-evl-reverse-load-store.ll @@ -0,0 +1,119 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=IF-EVL + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck %s --check-prefix=NO-VP + +; FIXME: reversed loads/stores are not supported yet with predicated vectorization. +define void @reverse_load_store(i64 %startval, ptr noalias %ptr, ptr noalias %ptr2) { +; IF-EVL-LABEL: @reverse_load_store( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; IF-EVL-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; IF-EVL-NEXT: [[TMP4:%.*]] = sub i64 [[TMP3]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 1024, [[TMP4]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP1]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[IND_END:%.*]] = sub i64 [[STARTVAL:%.*]], [[N_VEC]] +; IF-EVL-NEXT: [[IND_END1:%.*]] = trunc i64 [[N_VEC]] to i32 +; IF-EVL-NEXT: [[TMP5:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP6:%.*]] = mul i64 [[TMP5]], 4 +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[OFFSET_IDX:%.*]] = sub i64 [[STARTVAL]], [[INDEX]] +; IF-EVL-NEXT: [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, i64 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer +; IF-EVL-NEXT: [[TMP8:%.*]] = call @llvm.experimental.stepvector.nxv4i64() +; IF-EVL-NEXT: [[TMP9:%.*]] = add zeroinitializer, [[TMP8]] +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add [[BROADCAST_SPLAT]], [[TMP9]] +; IF-EVL-NEXT: [[TMP10:%.*]] = icmp ule [[VEC_IV]], shufflevector ( insertelement ( poison, i64 1023, i64 0), poison, zeroinitializer) +; IF-EVL-NEXT: [[TMP11:%.*]] = add i64 [[TMP7]], -1 +; IF-EVL-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP14:%.*]] = mul i64 [[TMP13]], 4 +; IF-EVL-NEXT: [[TMP15:%.*]] = mul i64 0, [[TMP14]] +; IF-EVL-NEXT: [[TMP16:%.*]] = sub i64 1, [[TMP14]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i64 [[TMP15]] +; IF-EVL-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP17]], i64 [[TMP16]] +; IF-EVL-NEXT: [[REVERSE:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[TMP10]]) +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call @llvm.masked.load.nxv4i32.p0(ptr [[TMP18]], i32 4, [[REVERSE]], poison) +; IF-EVL-NEXT: [[REVERSE3:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[WIDE_MASKED_LOAD]]) +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[TMP11]] +; IF-EVL-NEXT: [[TMP20:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP21:%.*]] = mul i64 [[TMP20]], 4 +; IF-EVL-NEXT: [[TMP22:%.*]] = mul i64 0, [[TMP21]] +; IF-EVL-NEXT: [[TMP23:%.*]] = sub i64 1, [[TMP21]] +; IF-EVL-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i64 [[TMP22]] +; IF-EVL-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i64 [[TMP23]] +; IF-EVL-NEXT: [[REVERSE4:%.*]] = call @llvm.experimental.vector.reverse.nxv4i1( [[TMP10]]) +; IF-EVL-NEXT: [[REVERSE5:%.*]] = call @llvm.experimental.vector.reverse.nxv4i32( [[REVERSE3]]) +; IF-EVL-NEXT: call void @llvm.masked.store.nxv4i32.p0( [[REVERSE5]], ptr [[TMP25]], i32 4, [[REVERSE4]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP6]] +; IF-EVL-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[LOOPEND:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[STARTVAL]], [[ENTRY:%.*]] ] +; IF-EVL-NEXT: [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END1]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 +; IF-EVL-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR]], i64 [[ADD]] +; IF-EVL-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 +; IF-EVL-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2]], i64 [[ADD]] +; IF-EVL-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 +; IF-EVL-NEXT: [[INC]] = add i32 [[I]], 1 +; IF-EVL-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 +; IF-EVL-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: loopend: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @reverse_load_store( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[ADD_PHI:%.*]] = phi i64 [ [[STARTVAL:%.*]], [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[I:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[INC:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ADD]] = add i64 [[ADD_PHI]], -1 +; NO-VP-NEXT: [[GEPL:%.*]] = getelementptr inbounds i32, ptr [[PTR:%.*]], i64 [[ADD]] +; NO-VP-NEXT: [[TMP:%.*]] = load i32, ptr [[GEPL]], align 4 +; NO-VP-NEXT: [[GEPS:%.*]] = getelementptr inbounds i32, ptr [[PTR2:%.*]], i64 [[ADD]] +; NO-VP-NEXT: store i32 [[TMP]], ptr [[GEPS]], align 4 +; NO-VP-NEXT: [[INC]] = add i32 [[I]], 1 +; NO-VP-NEXT: [[EXITCOND:%.*]] = icmp ne i32 [[INC]], 1024 +; NO-VP-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY]], label [[LOOPEND:%.*]] +; NO-VP: loopend: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %add.phi = phi i64 [ %startval, %entry ], [ %add, %for.body ] + %i = phi i32 [ 0, %entry ], [ %inc, %for.body ] + %add = add i64 %add.phi, -1 + %gepl = getelementptr inbounds i32, ptr %ptr, i64 %add + %tmp = load i32, ptr %gepl, align 4 + %geps = getelementptr inbounds i32, ptr %ptr2, i64 %add + store i32 %tmp, ptr %geps, align 4 + %inc = add i32 %i, 1 + %exitcond = icmp ne i32 %inc, 1024 + br i1 %exitcond, label %for.body, label %loopend + +loopend: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll new file mode 100644 index 0000000..c69bb17 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vectorize-vp-intrinsics.ll @@ -0,0 +1,142 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -S < %s | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: [[TMP0:%.*]] = sub i64 -1, [[N:%.*]] +; IF-EVL-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 4 +; IF-EVL-NEXT: [[TMP3:%.*]] = icmp ult i64 [[TMP0]], [[TMP2]] +; IF-EVL-NEXT: br i1 [[TMP3]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; IF-EVL-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP7:%.*]] = mul i64 [[TMP6]], 4 +; IF-EVL-NEXT: [[TMP8:%.*]] = sub i64 [[TMP7]], 1 +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N]], [[TMP8]] +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], [[TMP5]] +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; IF-EVL-NEXT: [[TMP10:%.*]] = mul i64 [[TMP9]], 4 +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[EVL_BASED_IV:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_EVL_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP11:%.*]] = sub i64 [[N]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.get.vector.length.i64(i64 [[TMP11]], i32 4, i1 true) +; IF-EVL-NEXT: [[TMP13:%.*]] = add i64 [[EVL_BASED_IV]], 0 +; IF-EVL-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[TMP14]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP15]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP16]], i32 0 +; IF-EVL-NEXT: [[VP_OP_LOAD1:%.*]] = call @llvm.vp.load.nxv4i32.p0(ptr align 4 [[TMP17]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP18:%.*]] = add nsw [[VP_OP_LOAD1]], [[VP_OP_LOAD]] +; IF-EVL-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP13]] +; IF-EVL-NEXT: [[TMP20:%.*]] = getelementptr inbounds i32, ptr [[TMP19]], i32 0 +; IF-EVL-NEXT: call void @llvm.vp.store.nxv4i32.p0( [[TMP18]], ptr align 4 [[TMP20]], shufflevector ( insertelement ( poison, i1 true, i64 0), poison, zeroinitializer), i32 [[TMP12]]) +; IF-EVL-NEXT: [[TMP21:%.*]] = zext i32 [[TMP12]] to i64 +; IF-EVL-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP21]], [[EVL_BASED_IV]] +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP10]] +; IF-EVL-NEXT: [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP23:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP24:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP24]], [[TMP23]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP6:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[TMP11:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP6]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: store [[TMP11]], ptr [[TMP13]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; NO-VP-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP16:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP15]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll new file mode 100644 index 0000000..72b881b --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/RISCV/vplan-vp-intrinsics.ll @@ -0,0 +1,134 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=IF-EVL,CHECK %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=riscv64 -mattr=+v -riscv-v-vector-bits-max=128 -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP,CHECK %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF={1}' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count +; IF-EVL-EMPTY: +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI vp<[[EVL_PHI:%[0-9]+]]> = phi ir<0>, vp<[[IV_NEXT:%[0-9]+]]> +; IF-EVL-NEXT: EMIT vp<[[EVL:%.+]]> = EXPLICIT-VECTOR-LENGTH vp<[[EVL_PHI]]>, ir<%N> +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[EVL_PHI]]>, ir<1> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>, ir +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]>, ir +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, ir +; IF-EVL-NEXT: SCALAR-CAST vp<[[CAST:%[0-9]+]]> = zext vp<[[EVL]]> to i64 +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT]]> = add vp<[[CAST]]>, vp<[[EVL_PHI]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT_EXIT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT_EXIT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +; NO-VP: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2,vscale x 4},UF>=1' { +; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; NO-VP-NEXT: Live-in ir<%N> = original trip-count +; NO-VP-EMPTY: +; NO-VP: vector.ph: +; NO-VP-NEXT: Successor(s): vector loop +; NO-VP-EMPTY: +; NO-VP-NEXT: vector loop: { +; NO-VP-NEXT: vector.body: +; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> +; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> +; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; NO-VP-NEXT: No successors +; NO-VP-NEXT: } + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + +define void @safe_dep(ptr %p) { +; CHECK: VPlan 'Initial VPlan for VF={vscale x 1,vscale x 2},UF>=1' { +; CHECK-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; CHECK-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; CHECK-NEXT: Live-in ir<512> = original trip-count +; CHECK-EMPTY: +; CHECK: vector.ph: +; CHECK-NEXT: Successor(s): vector loop +; CHECK-EMPTY: +; CHECK-NEXT: vector loop: { +; CHECK-NEXT: vector.body: +; CHECK-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; CHECK-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; CHECK-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr ir<%p>, vp<[[ST]]> +; CHECK-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; CHECK-NEXT: WIDEN ir<[[V:%.+]]> = load vp<[[PTR1]]> +; CHECK-NEXT: CLONE ir<[[OFFSET:.+]]> = add vp<[[ST]]>, ir<100> +; CHECK-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr ir<%p>, ir<[[OFFSET]]> +; CHECK-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; CHECK-NEXT: WIDEN store vp<[[PTR2]]>, ir<[[V]]> +; CHECK-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; CHECK-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; CHECK-NEXT: No successors +; CHECK-NEXT: } + +entry: + br label %loop + +loop: + %iv = phi i64 [0, %entry], [%iv.next, %loop] + %a1 = getelementptr i64, ptr %p, i64 %iv + %v = load i64, ptr %a1, align 32 + %offset = add i64 %iv, 100 + %a2 = getelementptr i64, ptr %p, i64 %offset + store i64 %v, ptr %a2, align 32 + %iv.next = add i64 %iv, 1 + %cmp = icmp ne i64 %iv, 511 + br i1 %cmp, label %loop, label %exit + +exit: + ret void +} + diff --git a/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000..1cf71360 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/vectorize-force-tail-with-evl.ll @@ -0,0 +1,191 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -S < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -S < %s 2>&1 | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; IF-EVL: vector.ph: +; IF-EVL-NEXT: [[N_RND_UP:%.*]] = add i64 [[N:%.*]], 15 +; IF-EVL-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N_RND_UP]], 16 +; IF-EVL-NEXT: [[N_VEC:%.*]] = sub i64 [[N_RND_UP]], [[N_MOD_VF]] +; IF-EVL-NEXT: [[TRIP_COUNT_MINUS_1:%.*]] = sub i64 [[N]], 1 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <16 x i64> poison, i64 [[TRIP_COUNT_MINUS_1]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT1]], <16 x i64> poison, <16 x i32> zeroinitializer +; IF-EVL-NEXT: br label [[VECTOR_BODY:%.*]] +; IF-EVL: vector.body: +; IF-EVL-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; IF-EVL-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; IF-EVL-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <16 x i64> poison, i64 [[INDEX]], i64 0 +; IF-EVL-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <16 x i64> [[BROADCAST_SPLATINSERT]], <16 x i64> poison, <16 x i32> zeroinitializer +; IF-EVL-NEXT: [[VEC_IV:%.*]] = add <16 x i64> [[BROADCAST_SPLAT]], +; IF-EVL-NEXT: [[TMP1:%.*]] = icmp ule <16 x i64> [[VEC_IV]], [[BROADCAST_SPLAT2]] +; IF-EVL-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[TMP2]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP3]], i32 4, <16 x i1> [[TMP1]], <16 x i32> poison) +; IF-EVL-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; IF-EVL-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <16 x i32> @llvm.masked.load.v16i32.p0(ptr [[TMP5]], i32 4, <16 x i1> [[TMP1]], <16 x i32> poison) +; IF-EVL-NEXT: [[TMP6:%.*]] = add nsw <16 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]] +; IF-EVL-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; IF-EVL-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP7]], i32 0 +; IF-EVL-NEXT: call void @llvm.masked.store.v16i32.p0(<16 x i32> [[TMP6]], ptr [[TMP8]], i32 4, <16 x i1> [[TMP1]]) +; IF-EVL-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 16 +; IF-EVL-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; IF-EVL-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; IF-EVL: middle.block: +; IF-EVL-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; IF-EVL: scalar.ph: +; IF-EVL-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP10]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: iter.check: +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]] +; NO-VP: vector.main.loop.iter.check: +; NO-VP-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[N]], 64 +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 64 +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP1:%.*]] = add i64 [[INDEX]], 16 +; NO-VP-NEXT: [[TMP2:%.*]] = add i64 [[INDEX]], 32 +; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 48 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 16 +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 32 +; NO-VP-NEXT: [[TMP11:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 48 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i32>, ptr [[TMP8]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD2:%.*]] = load <16 x i32>, ptr [[TMP9]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD3:%.*]] = load <16 x i32>, ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD4:%.*]] = load <16 x i32>, ptr [[TMP11]], align 4 +; NO-VP-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 0 +; NO-VP-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 16 +; NO-VP-NEXT: [[TMP18:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 32 +; NO-VP-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, ptr [[TMP12]], i32 48 +; NO-VP-NEXT: [[WIDE_LOAD5:%.*]] = load <16 x i32>, ptr [[TMP16]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD6:%.*]] = load <16 x i32>, ptr [[TMP17]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD7:%.*]] = load <16 x i32>, ptr [[TMP18]], align 4 +; NO-VP-NEXT: [[WIDE_LOAD8:%.*]] = load <16 x i32>, ptr [[TMP19]], align 4 +; NO-VP-NEXT: [[TMP20:%.*]] = add nsw <16 x i32> [[WIDE_LOAD5]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP21:%.*]] = add nsw <16 x i32> [[WIDE_LOAD6]], [[WIDE_LOAD2]] +; NO-VP-NEXT: [[TMP22:%.*]] = add nsw <16 x i32> [[WIDE_LOAD7]], [[WIDE_LOAD3]] +; NO-VP-NEXT: [[TMP23:%.*]] = add nsw <16 x i32> [[WIDE_LOAD8]], [[WIDE_LOAD4]] +; NO-VP-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP0]] +; NO-VP-NEXT: [[TMP25:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP1]] +; NO-VP-NEXT: [[TMP26:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP2]] +; NO-VP-NEXT: [[TMP27:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP28:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 0 +; NO-VP-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 16 +; NO-VP-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 32 +; NO-VP-NEXT: [[TMP31:%.*]] = getelementptr inbounds i32, ptr [[TMP24]], i32 48 +; NO-VP-NEXT: store <16 x i32> [[TMP20]], ptr [[TMP28]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP21]], ptr [[TMP29]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP22]], ptr [[TMP30]], align 4 +; NO-VP-NEXT: store <16 x i32> [[TMP23]], ptr [[TMP31]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 64 +; NO-VP-NEXT: [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]] +; NO-VP: vec.epilog.iter.check: +; NO-VP-NEXT: [[N_VEC_REMAINING:%.*]] = sub i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_VEC_REMAINING]], 8 +; NO-VP-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]] +; NO-VP: vec.epilog.ph: +; NO-VP-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ] +; NO-VP-NEXT: [[N_MOD_VF9:%.*]] = urem i64 [[N]], 8 +; NO-VP-NEXT: [[N_VEC10:%.*]] = sub i64 [[N]], [[N_MOD_VF9]] +; NO-VP-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]] +; NO-VP: vec.epilog.vector.body: +; NO-VP-NEXT: [[INDEX12:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT15:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP33:%.*]] = add i64 [[INDEX12]], 0 +; NO-VP-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, ptr [[TMP34]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD13:%.*]] = load <8 x i32>, ptr [[TMP35]], align 4 +; NO-VP-NEXT: [[TMP36:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP37:%.*]] = getelementptr inbounds i32, ptr [[TMP36]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD14:%.*]] = load <8 x i32>, ptr [[TMP37]], align 4 +; NO-VP-NEXT: [[TMP38:%.*]] = add nsw <8 x i32> [[WIDE_LOAD14]], [[WIDE_LOAD13]] +; NO-VP-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[TMP33]] +; NO-VP-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, ptr [[TMP39]], i32 0 +; NO-VP-NEXT: store <8 x i32> [[TMP38]], ptr [[TMP40]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT15]] = add nuw i64 [[INDEX12]], 8 +; NO-VP-NEXT: [[TMP41:%.*]] = icmp eq i64 [[INDEX_NEXT15]], [[N_VEC10]] +; NO-VP-NEXT: br i1 [[TMP41]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: vec.epilog.middle.block: +; NO-VP-NEXT: [[CMP_N11:%.*]] = icmp eq i64 [[N]], [[N_VEC10]] +; NO-VP-NEXT: br i1 [[CMP_N11]], label [[FOR_COND_CLEANUP]], label [[VEC_EPILOG_SCALAR_PH]] +; NO-VP: vec.epilog.scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC10]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[ITER_CHECK:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP42:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP43:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP43]], [[TMP42]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll new file mode 100644 index 0000000..9b49d44 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/X86/vplan-vp-intrinsics.ll @@ -0,0 +1,89 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -disable-output < %s 2>&1 | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize -force-vector-width=4 \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue \ +; RUN: -mtriple=x86_64 -mattr=+avx512f -disable-output < %s 2>&1 | FileCheck --check-prefix=NO-VP %s + +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL: VPlan 'Initial VPlan for VF={4},UF>=1' { +; IF-EVL-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; IF-EVL-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; IF-EVL-NEXT: Live-in vp<[[BETC:%[0-9]+]]> = backedge-taken count +; IF-EVL-NEXT: Live-in ir<%N> = original trip-count +; IF-EVL-EMPTY: +; IF-EVL: vector.ph: +; IF-EVL-NEXT: Successor(s): vector loop +; IF-EVL-EMPTY: +; IF-EVL-NEXT: vector loop: { +; IF-EVL-NEXT: vector.body: +; IF-EVL-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; IF-EVL-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; IF-EVL-NEXT: EMIT vp<[[VIV:%[0-9]+]]> = WIDEN-CANONICAL-INDUCTION vp<[[IV]]> +; IF-EVL-NEXT: EMIT vp<[[MASK:%[0-9]+]]> = icmp ule vp<[[VIV]]>, vp<[[BETC]]> +; IF-EVL-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; IF-EVL-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]>, vp<[[MASK]]> +; IF-EVL-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; IF-EVL-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]>, vp<[[MASK]]> +; IF-EVL-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; IF-EVL-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; IF-EVL-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; IF-EVL-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]>, vp<[[MASK]]> +; IF-EVL-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add vp<[[IV]]>, vp<[[VFUF]]> +; IF-EVL-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; IF-EVL-NEXT: No successors +; IF-EVL-NEXT: } + +; NO-VP: VPlan 'Initial VPlan for VF={4},UF>=1' { +; NO-VP-NEXT: Live-in vp<[[VFUF:%[0-9]+]]> = VF * UF +; NO-VP-NEXT: Live-in vp<[[VTC:%[0-9]+]]> = vector-trip-count +; NO-VP-NEXT: Live-in ir<%N> = original trip-count +; NO-VP-EMPTY: +; NO-VP: vector.ph: +; NO-VP-NEXT: Successor(s): vector loop +; NO-VP-EMPTY: +; NO-VP-NEXT: vector loop: { +; NO-VP-NEXT: vector.body: +; NO-VP-NEXT: EMIT vp<[[IV:%[0-9]+]]> = CANONICAL-INDUCTION +; NO-VP-NEXT: vp<[[ST:%[0-9]+]]> = SCALAR-STEPS vp<[[IV]]>, ir<1> +; NO-VP-NEXT: CLONE ir<[[GEP1:%.+]]> = getelementptr inbounds ir<%b>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR1:%[0-9]+]]> = vector-pointer ir<[[GEP1]]> +; NO-VP-NEXT: WIDEN ir<[[LD1:%.+]]> = load vp<[[PTR1]]> +; NO-VP-NEXT: CLONE ir<[[GEP2:%.+]]> = getelementptr inbounds ir<%c>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR2:%[0-9]+]]> = vector-pointer ir<[[GEP2]]> +; NO-VP-NEXT: WIDEN ir<[[LD2:%.+]]> = load vp<[[PTR2]]> +; NO-VP-NEXT: WIDEN ir<[[ADD:%.+]]> = add nsw ir<[[LD2]]>, ir<[[LD1]]> +; NO-VP-NEXT: CLONE ir<[[GEP3:%.+]]> = getelementptr inbounds ir<%a>, vp<[[ST]]> +; NO-VP-NEXT: vp<[[PTR3:%[0-9]+]]> = vector-pointer ir<[[GEP3]]> +; NO-VP-NEXT: WIDEN store vp<[[PTR3]]>, ir<[[ADD]]> +; NO-VP-NEXT: EMIT vp<[[IV_NEXT:%[0-9]+]]> = add nuw vp<[[IV]]>, vp<[[VFUF]]> +; NO-VP-NEXT: EMIT branch-on-count vp<[[IV_NEXT]]>, vp<[[VTC]]> +; NO-VP-NEXT: No successors +; NO-VP-NEXT: } + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll new file mode 100644 index 0000000..a90b38c --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vectorize-force-tail-with-evl.ll @@ -0,0 +1,101 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=IF-EVL %s + +; RUN: opt -passes=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-else-scalar-epilogue -force-vector-width=4 \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on -S < %s | FileCheck --check-prefix=NO-VP %s + +; The target does not support predicated vectorization. +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; IF-EVL-LABEL: @foo( +; IF-EVL-NEXT: entry: +; IF-EVL-NEXT: br label [[FOR_BODY:%.*]] +; IF-EVL: for.body: +; IF-EVL-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; IF-EVL-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; IF-EVL-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[IV]] +; IF-EVL-NEXT: [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; IF-EVL-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP0]] +; IF-EVL-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[IV]] +; IF-EVL-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; IF-EVL-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; IF-EVL-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N:%.*]] +; IF-EVL-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]] +; IF-EVL: for.cond.cleanup: +; IF-EVL-NEXT: ret void +; +; NO-VP-LABEL: @foo( +; NO-VP-NEXT: entry: +; NO-VP-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP8:%.*]] = mul i64 [[TMP0]], 4 +; NO-VP-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP8]] +; NO-VP-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] +; NO-VP: vector.ph: +; NO-VP-NEXT: [[TMP1:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP14:%.*]] = mul i64 [[TMP1]], 4 +; NO-VP-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP14]] +; NO-VP-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] +; NO-VP-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; NO-VP-NEXT: [[TMP15:%.*]] = mul i64 [[TMP2]], 4 +; NO-VP-NEXT: br label [[VECTOR_BODY:%.*]] +; NO-VP: vector.body: +; NO-VP-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] +; NO-VP-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0 +; NO-VP-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[B:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD:%.*]] = load , ptr [[TMP5]], align 4 +; NO-VP-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i32 0 +; NO-VP-NEXT: [[WIDE_LOAD1:%.*]] = load , ptr [[TMP7]], align 4 +; NO-VP-NEXT: [[TMP16:%.*]] = add nsw [[WIDE_LOAD1]], [[WIDE_LOAD]] +; NO-VP-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[TMP3]] +; NO-VP-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, ptr [[TMP9]], i32 0 +; NO-VP-NEXT: store [[TMP16]], ptr [[TMP10]], align 4 +; NO-VP-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] +; NO-VP-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; NO-VP: middle.block: +; NO-VP-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] +; NO-VP-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]] +; NO-VP: scalar.ph: +; NO-VP-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] +; NO-VP-NEXT: br label [[FOR_BODY:%.*]] +; NO-VP: for.body: +; NO-VP-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ] +; NO-VP-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[B]], i64 [[IV]] +; NO-VP-NEXT: [[TMP12:%.*]] = load i32, ptr [[ARRAYIDX]], align 4 +; NO-VP-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, ptr [[C]], i64 [[IV]] +; NO-VP-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX2]], align 4 +; NO-VP-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]] +; NO-VP-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[IV]] +; NO-VP-NEXT: store i32 [[ADD]], ptr [[ARRAYIDX4]], align 4 +; NO-VP-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 +; NO-VP-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] +; NO-VP-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]] +; NO-VP: for.cond.cleanup: +; NO-VP-NEXT: ret void +; +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} diff --git a/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll b/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll new file mode 100644 index 0000000..f510d47 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/vplan-force-tail-with-evl.ll @@ -0,0 +1,37 @@ +; REQUIRES: asserts + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=data-with-evl -force-vector-width=4 \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on \ +; RUN: -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP %s + +; RUN: opt -passes=loop-vectorize -debug-only=loop-vectorize \ +; RUN: -force-tail-folding-style=none \ +; RUN: -prefer-predicate-over-epilogue=predicate-dont-vectorize \ +; RUN: -force-target-supports-scalable-vectors -scalable-vectorization=on \ +; RUN: -disable-output < %s 2>&1 | FileCheck --check-prefixes=NO-VP %s + +; The target does not support predicated vectorization. +define void @foo(ptr noalias %a, ptr noalias %b, ptr noalias %c, i64 %N) { +; NO-VP-NOT: EXPLICIT-VECTOR-LENGTH-BASED-IV-PHI + +entry: + br label %for.body + +for.body: + %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, ptr %b, i64 %iv + %0 = load i32, ptr %arrayidx, align 4 + %arrayidx2 = getelementptr inbounds i32, ptr %c, i64 %iv + %1 = load i32, ptr %arrayidx2, align 4 + %add = add nsw i32 %1, %0 + %arrayidx4 = getelementptr inbounds i32, ptr %a, i64 %iv + store i32 %add, ptr %arrayidx4, align 4 + %iv.next = add nuw nsw i64 %iv, 1 + %exitcond.not = icmp eq i64 %iv.next, %N + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body + +for.cond.cleanup: + ret void +} + -- cgit v1.1 From ca55ee88263e5b190965c3f14fd3b2647efab26a Mon Sep 17 00:00:00 2001 From: Shubham Rastogi Date: Thu, 4 Apr 2024 15:37:07 -0700 Subject: Revert "Debuginfod Testing & fixes: 3rd times the charm? (#87676)" This reverts commit d6713ad80d6907210c629f22babaf12177fa329c. This changed was reverted because of greendragon failures such as Unresolved Tests (2): lldb-api :: debuginfod/Normal/TestDebuginfod.py lldb-api :: debuginfod/SplitDWARF/TestDebuginfodDWP.py --- .../Python/lldbsuite/test/make/Makefile.rules | 26 +-- .../Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp | 38 ++-- lldb/source/Plugins/SymbolLocator/CMakeLists.txt | 7 +- .../Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp | 29 +--- lldb/test/API/debuginfod/Normal/Makefile | 19 -- lldb/test/API/debuginfod/Normal/TestDebuginfod.py | 183 ------------------- lldb/test/API/debuginfod/Normal/main.c | 7 - lldb/test/API/debuginfod/SplitDWARF/Makefile | 23 --- .../API/debuginfod/SplitDWARF/TestDebuginfodDWP.py | 193 --------------------- lldb/test/API/debuginfod/SplitDWARF/main.c | 7 - 10 files changed, 17 insertions(+), 515 deletions(-) delete mode 100644 lldb/test/API/debuginfod/Normal/Makefile delete mode 100644 lldb/test/API/debuginfod/Normal/TestDebuginfod.py delete mode 100644 lldb/test/API/debuginfod/Normal/main.c delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/Makefile delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py delete mode 100644 lldb/test/API/debuginfod/SplitDWARF/main.c diff --git a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules index ee8793f..bfd249c 100644 --- a/lldb/packages/Python/lldbsuite/test/make/Makefile.rules +++ b/lldb/packages/Python/lldbsuite/test/make/Makefile.rules @@ -51,7 +51,7 @@ LLDB_BASE_DIR := $(THIS_FILE_DIR)/../../../../../ # # GNUWin32 uname gives "windows32" or "server version windows32" while # some versions of MSYS uname return "MSYS_NT*", but most environments -# standardize on "Windows_NT", so we'll make it consistent here. +# standardize on "Windows_NT", so we'll make it consistent here. # When running tests from Visual Studio, the environment variable isn't # inherited all the way down to the process spawned for make. #---------------------------------------------------------------------- @@ -210,12 +210,6 @@ else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" DSYM = $(EXE).debug endif - - ifeq "$(MAKE_DWP)" "YES" - MAKE_DWO := YES - DWP_NAME = $(EXE).dwp - DYLIB_DWP_NAME = $(DYLIB_NAME).dwp - endif endif LIMIT_DEBUG_INFO_FLAGS = @@ -363,7 +357,6 @@ ifneq "$(OS)" "Darwin" OBJCOPY ?= $(call replace_cc_with,objcopy) ARCHIVER ?= $(call replace_cc_with,ar) - DWP ?= $(call replace_cc_with,dwp) override AR = $(ARCHIVER) endif @@ -534,10 +527,6 @@ ifneq "$(CXX)" "" endif endif -ifeq "$(GEN_GNU_BUILD_ID)" "YES" - LDFLAGS += -Wl,--build-id -endif - #---------------------------------------------------------------------- # DYLIB_ONLY variable can be used to skip the building of a.out. # See the sections below regarding dSYM file as well as the building of @@ -576,17 +565,10 @@ else endif else ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" -ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" - cp "$(EXE)" "$(EXE).unstripped" -endif $(OBJCOPY) --only-keep-debug "$(EXE)" "$(DSYM)" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DSYM)" "$(EXE)" "$(EXE)" endif -ifeq "$(MAKE_DWP)" "YES" - $(DWP) -o "$(DWP_NAME)" $(DWOS) endif -endif - #---------------------------------------------------------------------- # Make the dylib @@ -628,15 +610,9 @@ endif else $(LD) $(DYLIB_OBJECTS) $(LDFLAGS) -shared -o "$(DYLIB_FILENAME)" ifeq "$(SPLIT_DEBUG_SYMBOLS)" "YES" - ifeq "$(SAVE_FULL_DEBUG_BINARY)" "YES" - cp "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).unstripped" - endif $(OBJCOPY) --only-keep-debug "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME).debug" $(OBJCOPY) --strip-debug --add-gnu-debuglink="$(DYLIB_FILENAME).debug" "$(DYLIB_FILENAME)" "$(DYLIB_FILENAME)" endif -ifeq "$(MAKE_DWP)" "YES" - $(DWP) -o $(DYLIB_DWP_FILE) $(DYLIB_DWOS) -endif endif #---------------------------------------------------------------------- diff --git a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp index dafdf24..49f13d2 100644 --- a/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp +++ b/lldb/source/Plugins/SymbolFile/DWARF/SymbolFileDWARF.cpp @@ -4378,38 +4378,26 @@ const std::shared_ptr &SymbolFileDWARF::GetDwpSymbolFile() { FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); ModuleSpec module_spec; module_spec.GetFileSpec() = m_objfile_sp->GetFileSpec(); - FileSpec dwp_filespec; for (const auto &symfile : symfiles.files()) { module_spec.GetSymbolFileSpec() = FileSpec(symfile.GetPath() + ".dwp", symfile.GetPathStyle()); LLDB_LOG(log, "Searching for DWP using: \"{0}\"", module_spec.GetSymbolFileSpec()); - dwp_filespec = + FileSpec dwp_filespec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); if (FileSystem::Instance().Exists(dwp_filespec)) { - break; - } - } - if (!FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "No DWP file found locally"); - // Fill in the UUID for the module we're trying to match for, so we can - // find the correct DWP file, as the Debuginfod plugin uses *only* this - // data to correctly match the DWP file with the binary. - module_spec.GetUUID() = m_objfile_sp->GetUUID(); - dwp_filespec = - PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - } - if (FileSystem::Instance().Exists(dwp_filespec)) { - LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - GetObjectFile()->GetModule(), &dwp_filespec, 0, - FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, - dwp_file_data_offset); - if (dwp_obj_file) { - m_dwp_symfile = std::make_shared( - *this, dwp_obj_file, DIERef::k_file_index_mask); + LLDB_LOG(log, "Found DWP file: \"{0}\"", dwp_filespec); + DataBufferSP dwp_file_data_sp; + lldb::offset_t dwp_file_data_offset = 0; + ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( + GetObjectFile()->GetModule(), &dwp_filespec, 0, + FileSystem::Instance().GetByteSize(dwp_filespec), dwp_file_data_sp, + dwp_file_data_offset); + if (dwp_obj_file) { + m_dwp_symfile = std::make_shared( + *this, dwp_obj_file, DIERef::k_file_index_mask); + break; + } } } if (!m_dwp_symfile) { diff --git a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt index 3367022..ca96962 100644 --- a/lldb/source/Plugins/SymbolLocator/CMakeLists.txt +++ b/lldb/source/Plugins/SymbolLocator/CMakeLists.txt @@ -1,10 +1,5 @@ -# Order matters here: the first symbol locator prevents further searching. -# For DWARF binaries that are both stripped and split, the Default plugin -# will return the stripped binary when asked for the ObjectFile, which then -# prevents an unstripped binary from being requested from the Debuginfod -# provider. -add_subdirectory(Debuginfod) add_subdirectory(Default) if (CMAKE_SYSTEM_NAME MATCHES "Darwin") add_subdirectory(DebugSymbols) endif() +add_subdirectory(Debuginfod) diff --git a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp index f296e65..b5fe35d 100644 --- a/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp +++ b/lldb/source/Plugins/SymbolVendor/ELF/SymbolVendorELF.cpp @@ -44,24 +44,6 @@ llvm::StringRef SymbolVendorELF::GetPluginDescriptionStatic() { "executables."; } -// If this is needed elsewhere, it can be exported/moved. -static bool IsDwpSymbolFile(const lldb::ModuleSP &module_sp, - const FileSpec &file_spec) { - DataBufferSP dwp_file_data_sp; - lldb::offset_t dwp_file_data_offset = 0; - // Try to create an ObjectFile from the file_spec. - ObjectFileSP dwp_obj_file = ObjectFile::FindPlugin( - module_sp, &file_spec, 0, FileSystem::Instance().GetByteSize(file_spec), - dwp_file_data_sp, dwp_file_data_offset); - // The presence of a debug_cu_index section is the key identifying feature of - // a DWP file. Make sure we don't fill in the section list on dwp_obj_file - // (by calling GetSectionList(false)) as this function could be called before - // we may have all the symbol files collected and available. - return dwp_obj_file && ObjectFileELF::classof(dwp_obj_file.get()) && - dwp_obj_file->GetSectionList(false)->FindSectionByType( - eSectionTypeDWARFDebugCuIndex, false); -} - // CreateInstance // // Platforms can register a callback to use when creating symbol vendors to @@ -105,15 +87,8 @@ SymbolVendorELF::CreateInstance(const lldb::ModuleSP &module_sp, FileSpecList search_paths = Target::GetDefaultDebugFileSearchPaths(); FileSpec dsym_fspec = PluginManager::LocateExecutableSymbolFile(module_spec, search_paths); - if (!dsym_fspec || IsDwpSymbolFile(module_sp, dsym_fspec)) { - // If we have a stripped binary or if we got a DWP file, we should prefer - // symbols in the executable acquired through a plugin. - ModuleSpec unstripped_spec = - PluginManager::LocateExecutableObjectFile(module_spec); - if (!unstripped_spec) - return nullptr; - dsym_fspec = unstripped_spec.GetFileSpec(); - } + if (!dsym_fspec) + return nullptr; DataBufferSP dsym_file_data_sp; lldb::offset_t dsym_file_data_offset = 0; diff --git a/lldb/test/API/debuginfod/Normal/Makefile b/lldb/test/API/debuginfod/Normal/Makefile deleted file mode 100644 index 54bd7ad..0000000 --- a/lldb/test/API/debuginfod/Normal/Makefile +++ /dev/null @@ -1,19 +0,0 @@ -C_SOURCES := main.c - -# For normal (non DWP) Debuginfod tests, we need: - -# * The full binary: a.out.unstripped -# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and -# SPLIT_DEBUG_SYMBOLS set to YES - -# * The stripped binary (a.out) -# Produced by Makefile.rules with SPLIT_DEBUG_SYMBOLS set to YES - -# * The 'only-keep-debug' binary (a.out.debug) -# Produced below - -SPLIT_DEBUG_SYMBOLS := YES -SAVE_FULL_DEBUG_BINARY := YES -GEN_GNU_BUILD_ID := YES - -include Makefile.rules diff --git a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py b/lldb/test/API/debuginfod/Normal/TestDebuginfod.py deleted file mode 100644 index f1be1e7..0000000 --- a/lldb/test/API/debuginfod/Normal/TestDebuginfod.py +++ /dev/null @@ -1,183 +0,0 @@ -import os -import shutil -import tempfile - -import lldb -from lldbsuite.test.decorators import * -import lldbsuite.test.lldbutil as lldbutil -from lldbsuite.test.lldbtest import * - - -""" -Test support for the DebugInfoD network symbol acquisition protocol. -This one is for simple / no split-dwarf scenarios. - -For no-split-dwarf scenarios, there are 2 variations: -1 - A stripped binary with it's corresponding unstripped binary: -2 - A stripped binary with a corresponding --only-keep-debug symbols file -""" - - -# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots -class DebugInfodTests(TestBase): - # No need to try every flavor of debug inf. - NO_DEBUG_INFO_TESTCASE = True - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_normal_no_symbols(self): - """ - Validate behavior with no symbols or symbol locator. - ('baseline negative' behavior) - """ - test_root = self.config_test(["a.out"]) - self.try_breakpoint(False) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_normal_default(self): - """ - Validate behavior with symbols, but no symbol locator. - ('baseline positive' behavior) - """ - test_root = self.config_test(["a.out", "a.out.debug"]) - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_symbols(self): - """ - Test behavior with the full binary available from Debuginfod as - 'debuginfo' from the plug-in. - """ - test_root = self.config_test(["a.out"], "a.out.unstripped") - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_executable(self): - """ - Test behavior with the full binary available from Debuginfod as - 'executable' from the plug-in. - """ - test_root = self.config_test(["a.out"], None, "a.out.unstripped") - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_okd_symbols(self): - """ - Test behavior with the 'only-keep-debug' symbols available from Debuginfod. - """ - test_root = self.config_test(["a.out"], "a.out.debug") - self.try_breakpoint(True) - - def try_breakpoint(self, should_have_loc): - """ - This function creates a target from self.aout, sets a function-name - breakpoint, and checks to see if we have a file/line location, - as a way to validate that the symbols have been loaded. - should_have_loc specifies if we're testing that symbols have or - haven't been loaded. - """ - target = self.dbg.CreateTarget(self.aout) - self.assertTrue(target and target.IsValid(), "Target is valid") - - bp = target.BreakpointCreateByName("func") - self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") - self.assertEqual(bp.GetNumLocations(), 1) - - loc = bp.GetLocationAtIndex(0) - self.assertTrue(loc and loc.IsValid(), "Location is valid") - addr = loc.GetAddress() - self.assertTrue(addr and addr.IsValid(), "Loc address is valid") - line_entry = addr.GetLineEntry() - self.assertEqual( - should_have_loc, - line_entry != None and line_entry.IsValid(), - "Loc line entry is valid", - ) - if should_have_loc: - self.assertEqual(line_entry.GetLine(), 4) - self.assertEqual( - line_entry.GetFileSpec().GetFilename(), - self.main_source_file.GetFilename(), - ) - self.dbg.DeleteTarget(target) - shutil.rmtree(self.tmp_dir) - - def config_test(self, local_files, debuginfo=None, executable=None): - """ - Set up a test with local_files[] copied to a different location - so that we control which files are, or are not, found in the file system. - Also, create a stand-alone file-system 'hosted' debuginfod server with the - provided debuginfo and executable files (if they exist) - - Make the filesystem look like: - - /tmp//test/[local_files] - - /tmp//cache (for lldb to use as a temp cache) - - /tmp//buildid//executable -> - /tmp//buildid//debuginfo -> - Returns the /tmp/ path - """ - - self.build() - - uuid = self.getUUID("a.out") - if not uuid: - self.fail("Could not get UUID for a.out") - return - self.main_source_file = lldb.SBFileSpec("main.c") - self.tmp_dir = tempfile.mkdtemp() - test_dir = os.path.join(self.tmp_dir, "test") - os.makedirs(test_dir) - - self.aout = "" - # Copy the files used by the test: - for f in local_files: - shutil.copy(self.getBuildArtifact(f), test_dir) - # The first item is the binary to be used for the test - if self.aout == "": - self.aout = os.path.join(test_dir, f) - - use_debuginfod = debuginfo != None or executable != None - - # Populated the 'file://... mocked' Debuginfod server: - if use_debuginfod: - os.makedirs(os.path.join(self.tmp_dir, "cache")) - uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) - os.makedirs(uuid_dir) - if debuginfo: - shutil.copy( - self.getBuildArtifact(debuginfo), - os.path.join(uuid_dir, "debuginfo"), - ) - if executable: - shutil.copy( - self.getBuildArtifact(executable), - os.path.join(uuid_dir, "executable"), - ) - - # Configure LLDB for the test: - self.runCmd( - "settings set symbols.enable-external-lookup %s" - % str(use_debuginfod).lower() - ) - self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") - if use_debuginfod: - self.runCmd( - "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" - % self.tmp_dir - ) - self.runCmd( - "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" - % self.tmp_dir - ) - - def getUUID(self, filename): - try: - target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) - module = target.GetModuleAtIndex(0) - uuid = module.GetUUIDString().replace("-", "").lower() - self.dbg.DeleteTarget(target) - return uuid if len(uuid) == 40 else None - except: - return None diff --git a/lldb/test/API/debuginfod/Normal/main.c b/lldb/test/API/debuginfod/Normal/main.c deleted file mode 100644 index 4c71846..0000000 --- a/lldb/test/API/debuginfod/Normal/main.c +++ /dev/null @@ -1,7 +0,0 @@ -// This is a dump little pair of test files - -int func(int argc, const char *argv[]) { - return (argc + 1) * (argv[argc][0] + 2); -} - -int main(int argc, const char *argv[]) { return func(0, argv); } diff --git a/lldb/test/API/debuginfod/SplitDWARF/Makefile b/lldb/test/API/debuginfod/SplitDWARF/Makefile deleted file mode 100644 index 3ab9a96..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/Makefile +++ /dev/null @@ -1,23 +0,0 @@ -C_SOURCES := main.c - -# For split-dwarf Debuginfod tests, we need: - -# * A .DWP file (a.out.dwp) -# Produced by Makefile.rules with MAKE_DWP set to YES - -# * The "full" binary (missing things that live in .dwo's) (a.out.unstripped) -# Produced by Makefile.rules with SAVE_FULL_DEBUG_BINARY set to YES and -# SPLIT_DEBUG_SYMBOLS set to YES - -# * The stripped binary (a.out) -# Produced by Makefile.rules - -# * The 'only-keep-debug' binary (a.out.debug) -# Produced below - -MAKE_DWP := YES -SPLIT_DEBUG_SYMBOLS := YES -SAVE_FULL_DEBUG_BINARY := YES -GEN_GNU_BUILD_ID := YES - -include Makefile.rules diff --git a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py b/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py deleted file mode 100644 index fec2fa1..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/TestDebuginfodDWP.py +++ /dev/null @@ -1,193 +0,0 @@ -""" -Test support for the DebugInfoD network symbol acquisition protocol. -""" -import os -import shutil -import tempfile - -import lldb -from lldbsuite.test.decorators import * -import lldbsuite.test.lldbutil as lldbutil -from lldbsuite.test.lldbtest import * - - -""" -Test support for the DebugInfoD network symbol acquisition protocol. -This file is for split-dwarf (dwp) scenarios. - -1 - A split binary target with it's corresponding DWP file -2 - A stripped, split binary target with an unstripped binary and a DWP file -3 - A stripped, split binary target with an --only-keep-debug symbols file and a DWP file -""" - - -# It looks like Linux-AArch64 doesn't support build-id's on the LLDB builtbots -class DebugInfodDWPTests(TestBase): - # No need to try every flavor of debug inf. - NO_DEBUG_INFO_TESTCASE = True - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_normal_stripped(self): - """ - Validate behavior with a stripped binary, no symbols or symbol locator. - """ - self.config_test(["a.out"]) - self.try_breakpoint(False) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_normal_stripped_split_with_dwp(self): - """ - Validate behavior with symbols, but no symbol locator. - """ - self.config_test(["a.out", "a.out.debug", "a.out.dwp"]) - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_normal_stripped_only_dwp(self): - """ - Validate behavior *with* dwp symbols only, but missing other symbols, - but no symbol locator. This shouldn't work: without the other symbols - DWO's appear mostly useless. - """ - self.config_test(["a.out", "a.out.dwp"]) - self.try_breakpoint(False) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_dwp_from_service(self): - """ - Test behavior with the unstripped binary, and DWP from the service. - """ - self.config_test(["a.out.debug"], "a.out.dwp") - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_both_symfiles_from_service(self): - """ - Test behavior with a stripped binary, with the unstripped binary and - dwp symbols from Debuginfod. - """ - self.config_test(["a.out"], "a.out.dwp", "a.out.unstripped") - self.try_breakpoint(True) - - @skipIf(oslist=no_match(["linux"]), archs=no_match(["i386", "x86_64"])) - def test_debuginfod_both_okd_symfiles_from_service(self): - """ - Test behavior with both the only-keep-debug symbols and the dwp symbols - from Debuginfod. - """ - self.config_test(["a.out"], "a.out.dwp", "a.out.debug") - self.try_breakpoint(True) - - def try_breakpoint(self, should_have_loc): - """ - This function creates a target from self.aout, sets a function-name - breakpoint, and checks to see if we have a file/line location, - as a way to validate that the symbols have been loaded. - should_have_loc specifies if we're testing that symbols have or - haven't been loaded. - """ - target = self.dbg.CreateTarget(self.aout) - self.assertTrue(target and target.IsValid(), "Target is valid") - - bp = target.BreakpointCreateByName("func") - self.assertTrue(bp and bp.IsValid(), "Breakpoint is valid") - self.assertEqual(bp.GetNumLocations(), 1) - - loc = bp.GetLocationAtIndex(0) - self.assertTrue(loc and loc.IsValid(), "Location is valid") - addr = loc.GetAddress() - self.assertTrue(addr and addr.IsValid(), "Loc address is valid") - line_entry = addr.GetLineEntry() - self.assertEqual( - should_have_loc, - line_entry != None and line_entry.IsValid(), - "Loc line entry is valid", - ) - if should_have_loc: - self.assertEqual(line_entry.GetLine(), 4) - self.assertEqual( - line_entry.GetFileSpec().GetFilename(), - self.main_source_file.GetFilename(), - ) - self.dbg.DeleteTarget(target) - shutil.rmtree(self.tmp_dir) - - def config_test(self, local_files, debuginfo=None, executable=None): - """ - Set up a test with local_files[] copied to a different location - so that we control which files are, or are not, found in the file system. - Also, create a stand-alone file-system 'hosted' debuginfod server with the - provided debuginfo and executable files (if they exist) - - Make the filesystem look like: - - /tmp//test/[local_files] - - /tmp//cache (for lldb to use as a temp cache) - - /tmp//buildid//executable -> - /tmp//buildid//debuginfo -> - Returns the /tmp/ path - """ - - self.build() - - uuid = self.getUUID("a.out") - if not uuid: - self.fail("Could not get UUID for a.out") - return - self.main_source_file = lldb.SBFileSpec("main.c") - self.tmp_dir = tempfile.mkdtemp() - self.test_dir = os.path.join(self.tmp_dir, "test") - os.makedirs(self.test_dir) - - self.aout = "" - # Copy the files used by the test: - for f in local_files: - shutil.copy(self.getBuildArtifact(f), self.test_dir) - if self.aout == "": - self.aout = os.path.join(self.test_dir, f) - - use_debuginfod = debuginfo != None or executable != None - - # Populated the 'file://... mocked' Debuginfod server: - if use_debuginfod: - os.makedirs(os.path.join(self.tmp_dir, "cache")) - uuid_dir = os.path.join(self.tmp_dir, "buildid", uuid) - os.makedirs(uuid_dir) - if debuginfo: - shutil.copy( - self.getBuildArtifact(debuginfo), - os.path.join(uuid_dir, "debuginfo"), - ) - if executable: - shutil.copy( - self.getBuildArtifact(executable), - os.path.join(uuid_dir, "executable"), - ) - os.remove(self.getBuildArtifact("main.dwo")) - # Configure LLDB for the test: - self.runCmd( - "settings set symbols.enable-external-lookup %s" - % str(use_debuginfod).lower() - ) - self.runCmd("settings clear plugin.symbol-locator.debuginfod.server-urls") - if use_debuginfod: - self.runCmd( - "settings set plugin.symbol-locator.debuginfod.cache-path %s/cache" - % self.tmp_dir - ) - self.runCmd( - "settings insert-before plugin.symbol-locator.debuginfod.server-urls 0 file://%s" - % self.tmp_dir - ) - - def getUUID(self, filename): - try: - target = self.dbg.CreateTarget(self.getBuildArtifact(filename)) - module = target.GetModuleAtIndex(0) - uuid = module.GetUUIDString().replace("-", "").lower() - self.dbg.DeleteTarget(target) - return uuid if len(uuid) == 40 else None - except: - return None diff --git a/lldb/test/API/debuginfod/SplitDWARF/main.c b/lldb/test/API/debuginfod/SplitDWARF/main.c deleted file mode 100644 index 4c71846..0000000 --- a/lldb/test/API/debuginfod/SplitDWARF/main.c +++ /dev/null @@ -1,7 +0,0 @@ -// This is a dump little pair of test files - -int func(int argc, const char *argv[]) { - return (argc + 1) * (argv[argc][0] + 2); -} - -int main(int argc, const char *argv[]) { return func(0, argv); } -- cgit v1.1 From 96a99a5e2f62475f13d7ba18b15acad733909e7f Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 15:50:56 -0700 Subject: [NFC][UBSAN] Regenerate a test --- clang/test/CodeGen/remote-traps.c | 38 ++++++++++++++++++++++++++++++-------- 1 file changed, 30 insertions(+), 8 deletions(-) diff --git a/clang/test/CodeGen/remote-traps.c b/clang/test/CodeGen/remote-traps.c index 6983ddb..b12c2c6 100644 --- a/clang/test/CodeGen/remote-traps.c +++ b/clang/test/CodeGen/remote-traps.c @@ -1,15 +1,37 @@ -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 +// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s // RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE +// CHECK-LABEL: define dso_local noundef i32 @test( +// CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 123), !nosanitize [[META2:![0-9]+]] +// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] +// CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize [[META2]] +// CHECK: trap: +// CHECK-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] +// CHECK-NEXT: unreachable, !nosanitize [[META2]] +// CHECK: cont: +// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] +// CHECK-NEXT: ret i32 [[TMP2]] +// +// REMOVE-LABEL: define dso_local noundef i32 @test( +// REMOVE-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { +// REMOVE-NEXT: entry: +// REMOVE-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 123), !nosanitize [[META2:![0-9]+]] +// REMOVE-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] +// REMOVE-NEXT: [[TMP2:%.*]] = xor i1 [[TMP1]], true +// REMOVE-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) +// REMOVE-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] +// REMOVE-NEXT: ret i32 [[TMP3]] +// int test(int x) { return x + 123; } -// CHECK-LABEL: define {{.*}}i32 @test( -// CHECK: call { i32, i1 } @llvm.sadd.with.overflow.i32( -// CHECK: trap: -// CHECK-NEXT: call void @llvm.ubsantrap(i8 0) -// CHECK-NEXT: unreachable -// REMOVE-LABEL: define {{.*}}i32 @test( -// REMOVE: call { i32, i1 } @llvm.sadd.with.overflow.i32( +//. +// CHECK: [[META2]] = !{} +//. +// REMOVE: [[META2]] = !{} +//. -- cgit v1.1 From 852eb20b4f091a535ef758407d8555798b0ad809 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Thu, 4 Apr 2024 16:17:19 -0700 Subject: [RISCV][GISel] Make register bank selection for unary and binary arithmetic ops more generic. (#87593) This is inspired by AArch64's getSameKindOfOperandsMapping, but based on what RISC-V currently needs. This removes the special vector case for G_ADD/SUB and unifies integer and FP operations into the same handler. G_SEXTLOAD/ZEXTLOAD have been separated from integer since they should only be scalar integer and never vector. --- .../Target/RISCV/GISel/RISCVRegisterBankInfo.cpp | 45 ++++++++++++++-------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp index 8534024..86e4434 100644 --- a/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp +++ b/llvm/lib/Target/RISCV/GISel/RISCVRegisterBankInfo.cpp @@ -290,16 +290,7 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { switch (Opc) { case TargetOpcode::G_ADD: - case TargetOpcode::G_SUB: { - if (MRI.getType(MI.getOperand(0).getReg()).isVector()) { - LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - return getInstructionMapping( - DefaultMappingID, /*Cost=*/1, - getVRBValueMapping(Ty.getSizeInBits().getKnownMinValue()), - NumOperands); - } - } - LLVM_FALLTHROUGH; + case TargetOpcode::G_SUB: case TargetOpcode::G_SHL: case TargetOpcode::G_ASHR: case TargetOpcode::G_LSHR: @@ -320,10 +311,6 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_PTR_ADD: case TargetOpcode::G_PTRTOINT: case TargetOpcode::G_INTTOPTR: - case TargetOpcode::G_SEXTLOAD: - case TargetOpcode::G_ZEXTLOAD: - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, - NumOperands); case TargetOpcode::G_FADD: case TargetOpcode::G_FSUB: case TargetOpcode::G_FMUL: @@ -334,10 +321,34 @@ RISCVRegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { case TargetOpcode::G_FMAXNUM: case TargetOpcode::G_FMINNUM: { LLT Ty = MRI.getType(MI.getOperand(0).getReg()); - return getInstructionMapping(DefaultMappingID, /*Cost=*/1, - getFPValueMapping(Ty.getSizeInBits()), - NumOperands); + TypeSize Size = Ty.getSizeInBits(); + + const ValueMapping *Mapping; + if (Ty.isVector()) + Mapping = getVRBValueMapping(Size.getKnownMinValue()); + else if (isPreISelGenericFloatingPointOpcode(Opc)) + Mapping = getFPValueMapping(Size.getFixedValue()); + else + Mapping = GPRValueMapping; + +#ifndef NDEBUG + // Make sure all the operands are using similar size and type. + for (unsigned Idx = 1; Idx != NumOperands; ++Idx) { + LLT OpTy = MRI.getType(MI.getOperand(Idx).getReg()); + assert(Ty.isVector() == OpTy.isVector() && + "Operand has incompatible type"); + // Don't check size for GPR. + if (OpTy.isVector() || isPreISelGenericFloatingPointOpcode(Opc)) + assert(Size == OpTy.getSizeInBits() && "Operand has incompatible size"); + } +#endif // End NDEBUG + + return getInstructionMapping(DefaultMappingID, 1, Mapping, NumOperands); } + case TargetOpcode::G_SEXTLOAD: + case TargetOpcode::G_ZEXTLOAD: + return getInstructionMapping(DefaultMappingID, /*Cost=*/1, GPRValueMapping, + NumOperands); case TargetOpcode::G_IMPLICIT_DEF: { Register Dst = MI.getOperand(0).getReg(); LLT DstTy = MRI.getType(Dst); -- cgit v1.1 From cfadf3f62230505c1156e07f46c06813271bb5ac Mon Sep 17 00:00:00 2001 From: Shilei Tian Date: Thu, 4 Apr 2024 20:29:26 -0400 Subject: [TableGen] Fix a potential crash when operand doesn't appear in the instruction pattern (#87663) We have a check of whether an operand is in the instruction pattern, and emit an error if it is not, but we simply continue execution, including directly dereferencing a point-like object `InVal`, which will be just created when accessing the map. It contains a `nullptr` so dereferencing it causes crash. This is a very trivial fix. --- llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp index 076d042..7a5d2be 100644 --- a/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp +++ b/llvm/utils/TableGen/Common/CodeGenDAGPatterns.cpp @@ -3858,8 +3858,10 @@ void CodeGenDAGPatterns::parseInstructionPattern(CodeGenInstruction &CGI, for (unsigned i = NumResults, e = CGI.Operands.size(); i != e; ++i) { CGIOperandList::OperandInfo &Op = CGI.Operands[i]; const std::string &OpName = Op.Name; - if (OpName.empty()) + if (OpName.empty()) { I.error("Operand #" + Twine(i) + " in operands list has no name!"); + continue; + } if (!InstInputs.count(OpName)) { // If this is an operand with a DefaultOps set filled in, we can ignore @@ -3872,16 +3874,19 @@ void CodeGenDAGPatterns::parseInstructionPattern(CodeGenInstruction &CGI, } I.error("Operand $" + OpName + " does not appear in the instruction pattern"); + continue; } TreePatternNodePtr InVal = InstInputs[OpName]; InstInputs.erase(OpName); // It occurred, remove from map. if (InVal->isLeaf() && isa(InVal->getLeafValue())) { Record *InRec = cast(InVal->getLeafValue())->getDef(); - if (!checkOperandClass(Op, InRec)) + if (!checkOperandClass(Op, InRec)) { I.error("Operand $" + OpName + "'s register class disagrees" " between the operand and pattern"); + continue; + } } Operands.push_back(Op.Rec); -- cgit v1.1 From a9d93873f857963eeb9ef7f65a725e6aaf99c958 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 17:40:02 -0700 Subject: [clang][ubsan] Switch UBSAN optimization to `llvm.allow.{runtime,ubsan}.check()` (#84858) Intrinsic introduced with #84850. Intrinsics improves performance by 3% comparing to removing traps (on "test-suite/MultiSource/Benchmarks" with PGO+ThinLTO). The pass will be renamed with #84853. RFC: https://discourse.llvm.org/t/rfc-add-llvm-experimental-hot-intrinsic-or-llvm-hot/77641 --- clang/test/CodeGen/remote-traps.c | 37 ------- .../Transforms/Instrumentation/RemoveTrapsPass.cpp | 27 +++-- llvm/test/Transforms/RemoveTraps/remove-traps.ll | 115 ++++++++++++++++----- 3 files changed, 107 insertions(+), 72 deletions(-) delete mode 100644 clang/test/CodeGen/remote-traps.c diff --git a/clang/test/CodeGen/remote-traps.c b/clang/test/CodeGen/remote-traps.c deleted file mode 100644 index b12c2c6..0000000 --- a/clang/test/CodeGen/remote-traps.c +++ /dev/null @@ -1,37 +0,0 @@ -// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 4 -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow %s -o - | FileCheck %s -// RUN: %clang_cc1 -O1 -emit-llvm -fsanitize=signed-integer-overflow -fsanitize-trap=signed-integer-overflow -mllvm -remove-traps-random-rate=1 %s -o - | FileCheck %s --implicit-check-not="call void @llvm.ubsantrap" --check-prefixes=REMOVE - -// CHECK-LABEL: define dso_local noundef i32 @test( -// CHECK-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// CHECK-NEXT: entry: -// CHECK-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 123), !nosanitize [[META2:![0-9]+]] -// CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] -// CHECK-NEXT: br i1 [[TMP1]], label [[TRAP:%.*]], label [[CONT:%.*]], !nosanitize [[META2]] -// CHECK: trap: -// CHECK-NEXT: tail call void @llvm.ubsantrap(i8 0) #[[ATTR3:[0-9]+]], !nosanitize [[META2]] -// CHECK-NEXT: unreachable, !nosanitize [[META2]] -// CHECK: cont: -// CHECK-NEXT: [[TMP2:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] -// CHECK-NEXT: ret i32 [[TMP2]] -// -// REMOVE-LABEL: define dso_local noundef i32 @test( -// REMOVE-SAME: i32 noundef [[X:%.*]]) local_unnamed_addr #[[ATTR0:[0-9]+]] { -// REMOVE-NEXT: entry: -// REMOVE-NEXT: [[TMP0:%.*]] = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 [[X]], i32 123), !nosanitize [[META2:![0-9]+]] -// REMOVE-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1, !nosanitize [[META2]] -// REMOVE-NEXT: [[TMP2:%.*]] = xor i1 [[TMP1]], true -// REMOVE-NEXT: tail call void @llvm.assume(i1 [[TMP2]]) -// REMOVE-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP0]], 0, !nosanitize [[META2]] -// REMOVE-NEXT: ret i32 [[TMP3]] -// -int test(int x) { - return x + 123; -} - - -//. -// CHECK: [[META2]] = !{} -//. -// REMOVE: [[META2]] = !{} -//. diff --git a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp index b281468..6adc29f 100644 --- a/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp +++ b/llvm/lib/Transforms/Instrumentation/RemoveTrapsPass.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileSummaryInfo.h" +#include "llvm/IR/Constant.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" @@ -35,9 +36,11 @@ STATISTIC(NumChecksRemoved, "Number of removed checks"); static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, const ProfileSummaryInfo *PSI) { - SmallVector Remove; + SmallVector, 16> ReplaceWithValue; std::unique_ptr Rng; + // TODO: + // https://github.com/llvm/llvm-project/pull/84858#discussion_r1520603139 auto ShouldRemove = [&](bool IsHot) { if (!RandomRate.getNumOccurrences()) return IsHot; @@ -54,21 +57,23 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, continue; auto ID = II->getIntrinsicID(); switch (ID) { - case Intrinsic::ubsantrap: { + case Intrinsic::allow_ubsan_check: + case Intrinsic::allow_runtime_check: { ++NumChecksTotal; bool IsHot = false; if (PSI) { - uint64_t Count = 0; - for (const auto *PR : predecessors(&BB)) - Count += BFI.getBlockProfileCount(PR).value_or(0); + uint64_t Count = BFI.getBlockProfileCount(&BB).value_or(0); IsHot = PSI->isHotCountNthPercentile(HotPercentileCutoff, Count); } - if (ShouldRemove(IsHot)) { - Remove.push_back(II); + bool ToRemove = ShouldRemove(IsHot); + ReplaceWithValue.push_back({ + II, + ToRemove, + }); + if (ToRemove) ++NumChecksRemoved; - } break; } default: @@ -77,10 +82,12 @@ static bool removeUbsanTraps(Function &F, const BlockFrequencyInfo &BFI, } } - for (IntrinsicInst *I : Remove) + for (auto [I, V] : ReplaceWithValue) { + I->replaceAllUsesWith(ConstantInt::getBool(I->getType(), !V)); I->eraseFromParent(); + } - return !Remove.empty(); + return !ReplaceWithValue.empty(); } PreservedAnalyses RemoveTrapsPass::run(Function &F, diff --git a/llvm/test/Transforms/RemoveTraps/remove-traps.ll b/llvm/test/Transforms/RemoveTraps/remove-traps.ll index 4853149..c8d5fec 100644 --- a/llvm/test/Transforms/RemoveTraps/remove-traps.ll +++ b/llvm/test/Transforms/RemoveTraps/remove-traps.ll @@ -7,12 +7,15 @@ target triple = "x86_64-pc-linux-gnu" declare void @llvm.ubsantrap(i8 immarg) +declare i1 @llvm.allow.ubsan.check(i8 immarg) define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; NOPROFILE-LABEL: define dso_local noundef i32 @simple( ; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) { ; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; NOPROFILE: 3: ; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) ; NOPROFILE-NEXT: unreachable @@ -23,8 +26,11 @@ define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; ALL-LABEL: define dso_local noundef i32 @simple( ; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) { ; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL-NEXT: [[HOT:%.*]] = xor i1 false, true +; ALL-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; ALL-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; ALL: 3: +; ALL-NEXT: tail call void @llvm.ubsantrap(i8 22) ; ALL-NEXT: unreachable ; ALL: 4: ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -33,7 +39,9 @@ define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; HOT99-LABEL: define dso_local noundef i32 @simple( ; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) { ; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT99: 3: ; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT99-NEXT: unreachable @@ -44,7 +52,9 @@ define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; HOT70-LABEL: define dso_local noundef i32 @simple( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) { ; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT70: 3: ; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT70-NEXT: unreachable @@ -52,7 +62,10 @@ define dso_local noundef i32 @simple(ptr noundef readonly %0) { ; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; HOT70-NEXT: ret i32 [[TMP5]] ; - %2 = icmp eq ptr %0, null + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot br i1 %2, label %3, label %4 3: @@ -69,7 +82,9 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; NOPROFILE-LABEL: define dso_local noundef i32 @hot( ; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { ; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; NOPROFILE: 3: ; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) ; NOPROFILE-NEXT: unreachable @@ -80,8 +95,11 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; ALL-LABEL: define dso_local noundef i32 @hot( ; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { ; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL-NEXT: [[HOT:%.*]] = xor i1 false, true +; ALL-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; ALL-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; ALL: 3: +; ALL-NEXT: tail call void @llvm.ubsantrap(i8 22) ; ALL-NEXT: unreachable ; ALL: 4: ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -90,8 +108,11 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; HOT99-LABEL: define dso_local noundef i32 @hot( ; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { ; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT99-NEXT: unreachable ; HOT99: 4: ; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -100,7 +121,9 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; HOT70-LABEL: define dso_local noundef i32 @hot( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF16:![0-9]+]] { ; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT70: 3: ; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT70-NEXT: unreachable @@ -108,7 +131,10 @@ define dso_local noundef i32 @hot(ptr noundef readonly %0) !prof !36 { ; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; HOT70-NEXT: ret i32 [[TMP5]] ; - %2 = icmp eq ptr %0, null + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot br i1 %2, label %3, label %4 3: @@ -124,7 +150,9 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { ; NOPROFILE-LABEL: define dso_local noundef i32 @veryHot( ; NOPROFILE-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { ; NOPROFILE-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; NOPROFILE-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; NOPROFILE: 3: ; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) ; NOPROFILE-NEXT: unreachable @@ -135,8 +163,11 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { ; ALL-LABEL: define dso_local noundef i32 @veryHot( ; ALL-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { ; ALL-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; ALL-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; ALL-NEXT: [[HOT:%.*]] = xor i1 false, true +; ALL-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; ALL-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; ALL: 3: +; ALL-NEXT: tail call void @llvm.ubsantrap(i8 22) ; ALL-NEXT: unreachable ; ALL: 4: ; ALL-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -145,8 +176,11 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { ; HOT99-LABEL: define dso_local noundef i32 @veryHot( ; HOT99-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { ; HOT99-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT99-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT99: 3: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT99-NEXT: unreachable ; HOT99: 4: ; HOT99-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 @@ -155,14 +189,20 @@ define dso_local noundef i32 @veryHot(ptr noundef readonly %0) !prof !39 { ; HOT70-LABEL: define dso_local noundef i32 @veryHot( ; HOT70-SAME: ptr noundef readonly [[TMP0:%.*]]) !prof [[PROF17:![0-9]+]] { ; HOT70-NEXT: [[TMP2:%.*]] = icmp eq ptr [[TMP0]], null -; HOT70-NEXT: br i1 [[TMP2]], label [[TMP3:%.*]], label [[TMP4:%.*]] +; HOT70-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT70-NEXT: [[TMP6:%.*]] = or i1 [[TMP2]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP6]], label [[TMP3:%.*]], label [[TMP4:%.*]] ; HOT70: 3: +; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT70-NEXT: unreachable ; HOT70: 4: ; HOT70-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP0]], align 4 ; HOT70-NEXT: ret i32 [[TMP5]] ; - %2 = icmp eq ptr %0, null + %chk = icmp eq ptr %0, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %2 = or i1 %chk, %hot br i1 %2, label %3, label %4 3: @@ -182,7 +222,9 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon ; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] ; NOPROFILE: 4: ; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; NOPROFILE: 6: ; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) ; NOPROFILE-NEXT: unreachable @@ -199,8 +241,11 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon ; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] ; ALL: 4: ; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL-NEXT: [[HOT:%.*]] = xor i1 false, true +; ALL-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; ALL-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; ALL: 6: +; ALL-NEXT: tail call void @llvm.ubsantrap(i8 22) ; ALL-NEXT: unreachable ; ALL: 7: ; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -215,7 +260,9 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon ; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] ; HOT99: 4: ; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT99-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT99-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; HOT99: 6: ; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT99-NEXT: unreachable @@ -232,7 +279,9 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon ; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF18:![0-9]+]] ; HOT70: 4: ; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; HOT70: 6: ; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT70-NEXT: unreachable @@ -247,7 +296,10 @@ define dso_local noundef i32 @branchColdFnHot(i32 noundef %0, ptr noundef readon br i1 %3, label %9, label %4, !prof !38 4: - %5 = icmp eq ptr %1, null + %chk = icmp eq ptr %1, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %5 = or i1 %chk, %hot br i1 %5, label %6, label %7 6: @@ -270,7 +322,9 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; NOPROFILE-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] ; NOPROFILE: 4: ; NOPROFILE-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; NOPROFILE-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; NOPROFILE-NEXT: [[HOT:%.*]] = xor i1 true, true +; NOPROFILE-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; NOPROFILE-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; NOPROFILE: 6: ; NOPROFILE-NEXT: tail call void @llvm.ubsantrap(i8 22) ; NOPROFILE-NEXT: unreachable @@ -287,8 +341,11 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; ALL-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] ; ALL: 4: ; ALL-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; ALL-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; ALL-NEXT: [[HOT:%.*]] = xor i1 false, true +; ALL-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; ALL-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; ALL: 6: +; ALL-NEXT: tail call void @llvm.ubsantrap(i8 22) ; ALL-NEXT: unreachable ; ALL: 7: ; ALL-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -303,8 +360,11 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; HOT99-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] ; HOT99: 4: ; HOT99-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT99-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT99-NEXT: [[HOT:%.*]] = xor i1 false, true +; HOT99-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT99-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; HOT99: 6: +; HOT99-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT99-NEXT: unreachable ; HOT99: 7: ; HOT99-NEXT: [[TMP8:%.*]] = load i32, ptr [[TMP1]], align 4 @@ -319,7 +379,9 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon ; HOT70-NEXT: br i1 [[TMP3]], label [[TMP9:%.*]], label [[TMP4:%.*]], !prof [[PROF19:![0-9]+]] ; HOT70: 4: ; HOT70-NEXT: [[TMP5:%.*]] = icmp eq ptr [[TMP1]], null -; HOT70-NEXT: br i1 [[TMP5]], label [[TMP6:%.*]], label [[TMP7:%.*]] +; HOT70-NEXT: [[HOT:%.*]] = xor i1 true, true +; HOT70-NEXT: [[TMP11:%.*]] = or i1 [[TMP5]], [[HOT]] +; HOT70-NEXT: br i1 [[TMP11]], label [[TMP6:%.*]], label [[TMP7:%.*]] ; HOT70: 6: ; HOT70-NEXT: tail call void @llvm.ubsantrap(i8 22) ; HOT70-NEXT: unreachable @@ -334,7 +396,10 @@ define dso_local noundef i32 @branchHotFnCold(i32 noundef %0, ptr noundef readon br i1 %3, label %9, label %4, !prof !37 4: - %5 = icmp eq ptr %1, null + %chk = icmp eq ptr %1, null + %allow = call i1 @llvm.allow.ubsan.check(i8 22) + %hot = xor i1 %allow, true + %5 = or i1 %chk, %hot br i1 %5, label %6, label %7 6: -- cgit v1.1 From ab80d00438a04248f9a2f62546ea34c294d08f01 Mon Sep 17 00:00:00 2001 From: Qizhi Hu <836744285@qq.com> Date: Fri, 5 Apr 2024 08:41:06 +0800 Subject: [clang][ASTImporter] fix variable inline of CXX17 (#87314) Fix crash in the testcase from https://github.com/llvm/llvm-project/issues/75114#issuecomment-1872595956 Forget to set inline of variable declaration would make `isThisDeclarationADefinition` get incorrect result and didn't get imported variable. This will lead to a new `VarTemplateDecl` being created and call `setDescribedVarTemplate` again which produces the crash. Co-authored-by: huqizhi <836744285@qq.com> --- clang/lib/AST/ASTImporter.cpp | 4 ++++ clang/unittests/AST/ASTImporterTest.cpp | 28 ++++++++++++++++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/clang/lib/AST/ASTImporter.cpp b/clang/lib/AST/ASTImporter.cpp index 94a47a8..45d4c96 100644 --- a/clang/lib/AST/ASTImporter.cpp +++ b/clang/lib/AST/ASTImporter.cpp @@ -4541,6 +4541,10 @@ ExpectedDecl ASTNodeImporter::VisitVarDecl(VarDecl *D) { ToVar->setQualifierInfo(ToQualifierLoc); ToVar->setAccess(D->getAccess()); ToVar->setLexicalDeclContext(LexicalDC); + if (D->isInlineSpecified()) + ToVar->setInlineSpecified(); + if (D->isInline()) + ToVar->setImplicitlyInline(); if (FoundByLookup) { auto *Recent = const_cast(FoundByLookup->getMostRecentDecl()); diff --git a/clang/unittests/AST/ASTImporterTest.cpp b/clang/unittests/AST/ASTImporterTest.cpp index 35ab7e3..acc596f 100644 --- a/clang/unittests/AST/ASTImporterTest.cpp +++ b/clang/unittests/AST/ASTImporterTest.cpp @@ -5317,6 +5317,34 @@ TEST_P(ASTImporterOptionSpecificTestBase, EXPECT_FALSE(ToX); } +TEST_P(ASTImporterOptionSpecificTestBase, VarTemplateDeclInlineWithCXX17) { + Decl *FromTU = getTuDecl( + R"( + struct S { + template static constexpr bool X = true; + }; + )", + Lang_CXX17, "input1.cc"); + Decl *FromTU2 = getTuDecl( + R"( + struct S { + template static constexpr bool X = true; + template void get() { X; } + }; + template U qvariant_cast(const S &v) { return v.get; } + )", + Lang_CXX17, "input2.cc"); + auto *FromX = FirstDeclMatcher().match( + FromTU, varTemplateDecl(hasName("X"))); + auto *ToX = Import(FromX, Lang_CXX17); + ASSERT_TRUE(ToX); + auto *FromX2 = FirstDeclMatcher().match( + FromTU2, varTemplateDecl(hasName("X"))); + auto *ToX2 = Import(FromX2, Lang_CXX17); + EXPECT_TRUE(ToX2); + EXPECT_EQ(ToX, ToX2); +} + TEST_P(ASTImporterOptionSpecificTestBase, VarTemplateParameterDeclContext) { constexpr auto Code = R"( -- cgit v1.1 From 90453f4a9a8955ac612959504941153aa376cb0c Mon Sep 17 00:00:00 2001 From: Youngsuk Kim Date: Thu, 4 Apr 2024 20:45:18 -0400 Subject: [Clang][Sema] Warn unused cxx vardecl which entirely consists condition expr of if/while/for construct (#87348) Emit `-Wunused-but-set-variable` warning on C++ variables whose declaration (with initializer) entirely consist the condition expression of a if/while/for construct but are not actually used in the body of the if/while/for construct. Fixes #41447 --- clang/docs/ReleaseNotes.rst | 4 ++++ clang/include/clang/AST/Decl.h | 12 ++++++++++++ clang/lib/Sema/SemaDecl.cpp | 17 +++++++++++++++-- clang/lib/Sema/SemaDeclCXX.cpp | 3 +++ .../test/SemaCXX/warn-unused-but-set-variables-cpp.cpp | 8 ++++++++ 5 files changed, 42 insertions(+), 2 deletions(-) diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 8fc9253..92032c0 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -334,6 +334,10 @@ Improvements to Clang's diagnostics - Clang now emits ``unused argument`` warning when the -fmodule-output flag is used with an input that is not of type c++-module. +- Clang emits a ``-Wunused-but-set-variable`` warning on C++ variables whose declaration + (with initializer) entirely consist the condition expression of a if/while/for construct + but are not actually used in the body of the if/while/for construct. Fixes #GH41447 + Improvements to Clang's time-trace ---------------------------------- diff --git a/clang/include/clang/AST/Decl.h b/clang/include/clang/AST/Decl.h index a5879591f..5f1f83b 100644 --- a/clang/include/clang/AST/Decl.h +++ b/clang/include/clang/AST/Decl.h @@ -1100,6 +1100,9 @@ protected: LLVM_PREFERRED_TYPE(bool) unsigned EscapingByref : 1; + + LLVM_PREFERRED_TYPE(bool) + unsigned IsCXXCondDecl : 1; }; union { @@ -1589,6 +1592,15 @@ public: NonParmVarDeclBits.EscapingByref = true; } + bool isCXXCondDecl() const { + return isa(this) ? false : NonParmVarDeclBits.IsCXXCondDecl; + } + + void setCXXCondDecl() { + assert(!isa(this)); + NonParmVarDeclBits.IsCXXCondDecl = true; + } + /// Determines if this variable's alignment is dependent. bool hasDependentAlignment() const; diff --git a/clang/lib/Sema/SemaDecl.cpp b/clang/lib/Sema/SemaDecl.cpp index 5c11528..cbd84dd 100644 --- a/clang/lib/Sema/SemaDecl.cpp +++ b/clang/lib/Sema/SemaDecl.cpp @@ -2188,8 +2188,21 @@ void Sema::DiagnoseUnusedButSetDecl(const VarDecl *VD, assert(iter->getSecond() >= 0 && "Found a negative number of references to a VarDecl"); - if (iter->getSecond() != 0) - return; + if (int RefCnt = iter->getSecond(); RefCnt > 0) { + // Assume the given VarDecl is "used" if its ref count stored in + // `RefMinusAssignments` is positive, with one exception. + // + // For a C++ variable whose decl (with initializer) entirely consist the + // condition expression of a if/while/for construct, + // Clang creates a DeclRefExpr for the condition expression rather than a + // BinaryOperator of AssignmentOp. Thus, the C++ variable's ref + // count stored in `RefMinusAssignment` equals 1 when the variable is never + // used in the body of the if/while/for construct. + bool UnusedCXXCondDecl = VD->isCXXCondDecl() && (RefCnt == 1); + if (!UnusedCXXCondDecl) + return; + } + unsigned DiagID = isa(VD) ? diag::warn_unused_but_set_parameter : diag::warn_unused_but_set_variable; DiagReceiver(VD->getLocation(), PDiag(DiagID) << VD); diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index f32ff39..068a2e4 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -18564,6 +18564,9 @@ DeclResult Sema::ActOnCXXConditionDeclaration(Scope *S, Declarator &D) { return true; } + if (auto *VD = dyn_cast(Dcl)) + VD->setCXXCondDecl(); + return Dcl; } diff --git a/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp b/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp index 418baa7..eaedb53 100644 --- a/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp +++ b/clang/test/SemaCXX/warn-unused-but-set-variables-cpp.cpp @@ -69,3 +69,11 @@ template void f5() { SWarnUnused swu; ++swu; } + +void f6() { + if (int x = 123) {} // expected-warning{{variable 'x' set but not used}} + + while (int x = 123) {} // expected-warning{{variable 'x' set but not used}} + + for (; int x = 123;) {} // expected-warning{{variable 'x' set but not used}} +} -- cgit v1.1 From b76eb1ddfbacda273b8e6a9940f1da6812fdc2e0 Mon Sep 17 00:00:00 2001 From: Vitaly Buka Date: Thu, 4 Apr 2024 17:47:16 -0700 Subject: [clang][CodeGen] Remove SimplifyCFGPass preceding RemoveTrapsPass (#84852) There is no performance difference after switching to `llvm.experimental.hot`. --- clang/lib/CodeGen/BackendUtil.cpp | 6 ------ 1 file changed, 6 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index e25a176..ee4bd80 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -84,7 +84,6 @@ #include "llvm/Transforms/Scalar/EarlyCSE.h" #include "llvm/Transforms/Scalar/GVN.h" #include "llvm/Transforms/Scalar/JumpThreading.h" -#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Utils/Debugify.h" #include "llvm/Transforms/Utils/EntryExitInstrumenter.h" #include "llvm/Transforms/Utils/ModuleUtils.h" @@ -753,11 +752,6 @@ static void addSanitizers(const Triple &TargetTriple, // from `buildInlinerPipeline`, which called after profile matching. PB.registerScalarOptimizerLateEPCallback( [](FunctionPassManager &FPM, OptimizationLevel Level) { - // RemoveTrapsPass expects trap blocks preceded by conditional - // branches, which usually is not the case without SimplifyCFG. - // TODO: Remove `SimplifyCFGPass` after switching to dedicated - // intrinsic. - FPM.addPass(SimplifyCFGPass()); FPM.addPass(RemoveTrapsPass()); }); } -- cgit v1.1