diff options
author | Aiden Grossman <aidengrossman@google.com> | 2025-07-14 15:25:06 +0000 |
---|---|---|
committer | Aiden Grossman <aidengrossman@google.com> | 2025-07-14 15:25:06 +0000 |
commit | 46e896afdbfeb4228229059c0a854e8dd23473fa (patch) | |
tree | 9a2b6194f95a500ac869e797d76d2f7e7cfb7977 | |
parent | 80a1fee56c3a66d0fee90c00008f52f3ea271cc2 (diff) | |
parent | 3e43915be62ef355029b61bebe78637a885b8b6c (diff) | |
download | llvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.zip llvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.tar.gz llvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.tar.bz2 |
[𝘀𝗽𝗿] changes introduced through rebaseusers/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container
Created using spr 1.3.4
[skip ci]
18 files changed, 393 insertions, 26 deletions
diff --git a/libc/shared/math.h b/libc/shared/math.h index 7ad6b7e..ba6b1c2 100644 --- a/libc/shared/math.h +++ b/libc/shared/math.h @@ -17,5 +17,6 @@ #include "math/frexpf128.h" #include "math/frexpf16.h" #include "math/ldexpf128.h" +#include "math/ldexpf16.h" #endif // LLVM_LIBC_SHARED_MATH_H diff --git a/libc/shared/math/ldexpf16.h b/libc/shared/math/ldexpf16.h new file mode 100644 index 0000000..4c98c4c --- /dev/null +++ b/libc/shared/math/ldexpf16.h @@ -0,0 +1,31 @@ +//===-- Shared ldexpf16 function --------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF16_H +#define LLVM_LIBC_SHARED_MATH_LDEXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "shared/libc_common.h" +#include "src/__support/math/ldexpf16.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace shared { + +using math::ldexpf16; + +} // namespace shared + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SHARED_MATH_LDEXPF16_H diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt index 4adfc33..2215695 100644 --- a/libc/src/__support/math/CMakeLists.txt +++ b/libc/src/__support/math/CMakeLists.txt @@ -92,3 +92,13 @@ add_header_library( libc.src.__support.FPUtil.manipulation_functions libc.include.llvm-libc-types.float128 ) + +add_header_library( + ldexpf16 + HDRS + ldexpf16.h + DEPENDS + libc.src.__support.macros.properties.types + libc.src.__support.FPUtil.manipulation_functions + libc.include.llvm-libc-macros.float16_macros +) diff --git a/libc/src/__support/math/ldexpf16.h b/libc/src/__support/math/ldexpf16.h new file mode 100644 index 0000000..fbead87 --- /dev/null +++ b/libc/src/__support/math/ldexpf16.h @@ -0,0 +1,34 @@ +//===-- Implementation header for ldexpf16 ----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H +#define LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H + +#include "include/llvm-libc-macros/float16-macros.h" + +#ifdef LIBC_TYPES_HAS_FLOAT16 + +#include "src/__support/FPUtil/ManipulationFunctions.h" +#include "src/__support/common.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +namespace math { + +static constexpr float16 ldexpf16(float16 x, int exp) { + return fputil::ldexp(x, exp); +} + +} // namespace math + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LIBC_TYPES_HAS_FLOAT16 + +#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt index 4ad7e71..8be57cd 100644 --- a/libc/src/math/generic/CMakeLists.txt +++ b/libc/src/math/generic/CMakeLists.txt @@ -1933,8 +1933,7 @@ add_entrypoint_object( HDRS ../ldexpf16.h DEPENDS - libc.src.__support.macros.properties.types - libc.src.__support.FPUtil.manipulation_functions + libc.src.__support.math.ldexpf16 ) add_entrypoint_object( diff --git a/libc/src/math/generic/ldexpf16.cpp b/libc/src/math/generic/ldexpf16.cpp index caa344b..ecf1633 100644 --- a/libc/src/math/generic/ldexpf16.cpp +++ b/libc/src/math/generic/ldexpf16.cpp @@ -7,14 +7,13 @@ //===----------------------------------------------------------------------===// #include "src/math/ldexpf16.h" -#include "src/__support/FPUtil/ManipulationFunctions.h" -#include "src/__support/common.h" -#include "src/__support/macros/config.h" + +#include "src/__support/math/ldexpf16.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(float16, ldexpf16, (float16 x, int exp)) { - return fputil::ldexp(x, exp); + return math::ldexpf16(x, exp); } } // namespace LIBC_NAMESPACE_DECL diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td index 1f3d619..1b0e90b 100644 --- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td +++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td @@ -2387,6 +2387,9 @@ def : RWSysReg<"TRBSR_EL3", 0b11, 0b110, 0b1001, 0b1011, 0b011>; // v9.6 FEAT_PoPS // let Requires = [{ {AArch64::FeaturePoPS} }] in { -def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>; def : DC<"CIVAPS", 0b000, 0b0111, 0b1111, 0b001>; } + +let Requires = [{ {AArch64::FeaturePoPS, AArch64::FeatureMTE} }] in { +def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>; +} diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp index 6ae529e..31b4f11 100644 --- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp +++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp @@ -327,6 +327,8 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address, raw_ostream &CS) const { + CommentStream = &CS; + // Get the first two bytes of the instruction. Size = 0; if (Bytes.size() < 2) diff --git a/llvm/test/MC/AArch64/armv9.6a-ras.s b/llvm/test/MC/AArch64/armv9.6a-ras.s index 80fcb6b..3cf240c 100644 --- a/llvm/test/MC/AArch64/armv9.6a-ras.s +++ b/llvm/test/MC/AArch64/armv9.6a-ras.s @@ -1,9 +1,9 @@ // RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix NO-POPS -// RUN: llvm-mc -triple aarch64 -mattr=+pops -show-encoding < %s 2>&1 | FileCheck %s --check-prefix HAS-POPS +// RUN: llvm-mc -triple aarch64 -mattr=+pops,+mte -show-encoding < %s 2>&1 | FileCheck %s --check-prefix HAS-POPS dc CIGDVAPS, x3 dc CIVAPS, x3 -// NO-POPS: error: DC CIGDVAPS requires: pops +// NO-POPS: error: DC CIGDVAPS requires: mte, memtag, pops // NO-POPS: error: DC CIVAPS requires: pops # HAS-POPS: dc cigdvaps, x3 // encoding: [0xa3,0x7f,0x08,0xd5] diff --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt index d6bf657..f2971bc 100644 --- a/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt +++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt @@ -1,4 +1,4 @@ -# RUN: llvm-mc -triple aarch64 -mattr=+pops -disassemble < %s | FileCheck %s +# RUN: llvm-mc -triple aarch64 -mattr=+pops,+mte -disassemble < %s | FileCheck %s #------------------------------------------------------------------------------ # ARMV9.6-A RAS Extensions diff --git a/llvm/unittests/MC/SystemZ/CMakeLists.txt b/llvm/unittests/MC/SystemZ/CMakeLists.txt index 3b7af4a..6c10778 100644 --- a/llvm/unittests/MC/SystemZ/CMakeLists.txt +++ b/llvm/unittests/MC/SystemZ/CMakeLists.txt @@ -4,12 +4,14 @@ include_directories( set(LLVM_LINK_COMPONENTS SystemZ + MCDisassembler MCParser MC Support TargetParser ) -add_llvm_unittest(SystemZAsmLexerTests +add_llvm_unittest(SystemZMCTests SystemZAsmLexerTest.cpp + SystemZMCDisassemblerTest.cpp ) diff --git a/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp new file mode 100644 index 0000000..df59fcb --- /dev/null +++ b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp @@ -0,0 +1,103 @@ +//===- SystemZMCDisassemblerTest.cpp - Tests for SystemZ MCDisassembler ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCDisassembler/MCDisassembler.h" +#include "llvm/MC/MCDisassembler/MCSymbolizer.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCTargetOptions.h" +#include "llvm/MC/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "gtest/gtest.h" + +using namespace llvm; + +namespace { + +struct Context { + const char *TripleName = "systemz-unknown"; + std::unique_ptr<MCRegisterInfo> MRI; + std::unique_ptr<MCAsmInfo> MAI; + std::unique_ptr<MCContext> Ctx; + std::unique_ptr<MCSubtargetInfo> STI; + std::unique_ptr<MCDisassembler> DisAsm; + + Context() { + LLVMInitializeSystemZTargetInfo(); + LLVMInitializeSystemZTargetMC(); + LLVMInitializeSystemZDisassembler(); + + // If we didn't build SystemZ, do not run the test. + std::string Error; + const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error); + if (!TheTarget) + return; + + MRI.reset(TheTarget->createMCRegInfo(TripleName)); + MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions())); + STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", "")); + Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(), + STI.get()); + + DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx)); + } + + operator MCContext &() { return *Ctx; }; +}; + +Context &getContext() { + static Context Ctxt; + return Ctxt; +} + +class SystemZMCSymbolizerTest : public MCSymbolizer { +public: + SystemZMCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {} + ~SystemZMCSymbolizerTest() {} + + bool tryAddingSymbolicOperand([[maybe_unused]] MCInst &Inst, + [[maybe_unused]] raw_ostream &CStream, + [[maybe_unused]] int64_t Value, + [[maybe_unused]] uint64_t Address, + [[maybe_unused]] bool IsBranch, + [[maybe_unused]] uint64_t Offset, + [[maybe_unused]] uint64_t OpSize, + [[maybe_unused]] uint64_t InstSize) override { + return true; + } + + void + tryAddingPcLoadReferenceComment([[maybe_unused]] raw_ostream &cStream, + [[maybe_unused]] int64_t Value, + [[maybe_unused]] uint64_t Address) override {} +}; + +} // namespace + +TEST(SystemZDisassembler, SystemZMCSymbolizerTest) { + SystemZMCSymbolizerTest *TestSymbolizer = + new SystemZMCSymbolizerTest(getContext()); + getContext().DisAsm->setSymbolizer( + std::unique_ptr<MCSymbolizer>(TestSymbolizer)); + + MCInst Inst; + uint64_t InstSize; + + // Check that the SystemZ disassembler sets the comment stream before calling + // MCDisassembler::tryAddingSymbolicOperand. This will fail an assert if it + // does not do that. + MCDisassembler::DecodeStatus Status = getContext().DisAsm->getInstruction( + Inst, InstSize, + // lgrl %r1, 0x1234 + {0xc4, 0x18, 0x00, 0x00, 0x9a, 0x1a}, 0, nulls()); + ASSERT_TRUE(Status == MCDisassembler::Success); + EXPECT_EQ(InstSize, uint64_t{6}); +} diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp index d2a0326..ec2bc95 100644 --- a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp +++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp @@ -323,19 +323,28 @@ struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> { // It is not clear if this case actually happens in practice, but we keep // the operations just in case. Otherwise, if the arith computation bitwidth // is different from the emulated bitwidth we truncate the result. - Operation *result; + Value result; auto resultTy = getTypeConverter()->convertType(oldElementType); - if (resultTy == convertedElementType) { + auto conversionTy = + resultTy.isInteger() + ? resultTy + : IntegerType::get(rewriter.getContext(), + resultTy.getIntOrFloatBitWidth()); + if (conversionTy == convertedElementType) { auto mask = rewriter.create<arith::ConstantOp>( loc, convertedElementType, rewriter.getIntegerAttr(convertedElementType, (1 << srcBits) - 1)); result = rewriter.create<arith::AndIOp>(loc, bitsLoad, mask); } else { - result = rewriter.create<arith::TruncIOp>(loc, resultTy, bitsLoad); + result = rewriter.create<arith::TruncIOp>(loc, conversionTy, bitsLoad); } - rewriter.replaceOp(op, result->getResult(0)); + if (conversionTy != resultTy) { + result = rewriter.create<arith::BitcastOp>(loc, resultTy, result); + } + + rewriter.replaceOp(op, result); return success(); } }; @@ -415,8 +424,18 @@ struct ConvertMemrefStore final : OpConversionPattern<memref::StoreOp> { } Location loc = op.getLoc(); - Value extendedInput = rewriter.create<arith::ExtUIOp>(loc, dstIntegerType, - adaptor.getValue()); + + // Pad the input value with 0s on the left. + Value input = adaptor.getValue(); + if (!input.getType().isInteger()) { + input = rewriter.create<arith::BitcastOp>( + loc, + IntegerType::get(rewriter.getContext(), + input.getType().getIntOrFloatBitWidth()), + input); + } + Value extendedInput = + rewriter.create<arith::ExtUIOp>(loc, dstIntegerType, input); // Special case 0-rank memref stores. No need for masking. if (convertedType.getRank() == 0) { @@ -619,11 +638,11 @@ void memref::populateMemRefNarrowTypeEmulationConversions( arith::NarrowTypeEmulationConverter &typeConverter) { typeConverter.addConversion( [&typeConverter](MemRefType ty) -> std::optional<Type> { - auto intTy = dyn_cast<IntegerType>(ty.getElementType()); - if (!intTy) + Type elementType = ty.getElementType(); + if (!elementType.isIntOrFloat()) return ty; - unsigned width = intTy.getWidth(); + unsigned width = elementType.getIntOrFloatBitWidth(); unsigned loadStoreWidth = typeConverter.getLoadStoreBitwidth(); if (width >= loadStoreWidth) return ty; @@ -636,8 +655,11 @@ void memref::populateMemRefNarrowTypeEmulationConversions( if (!strides.empty() && strides.back() != 1) return nullptr; - auto newElemTy = IntegerType::get(ty.getContext(), loadStoreWidth, - intTy.getSignedness()); + auto newElemTy = IntegerType::get( + ty.getContext(), loadStoreWidth, + elementType.isInteger() + ? cast<IntegerType>(elementType).getSignedness() + : IntegerType::SignednessSemantics::Signless); if (!newElemTy) return nullptr; diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp index 004bead..0fe0841 100644 --- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp +++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp @@ -1268,8 +1268,18 @@ struct ConvertVectorTransferRead final bool isDivisibleInSize = fitsInMultiByteContainerTy(op.getVectorType(), containerElemTy); - auto newPadding = rewriter.create<arith::ExtUIOp>(loc, containerElemTy, - adaptor.getPadding()); + // Pad the padding value with 0s on the left. These bits are discarded and + // thus their values don't matter. + Value padding = adaptor.getPadding(); + if (!padding.getType().isInteger()) { + padding = rewriter.create<arith::BitcastOp>( + loc, + IntegerType::get(rewriter.getContext(), + padding.getType().getIntOrFloatBitWidth()), + padding); + } + auto newPadding = + rewriter.create<arith::ExtUIOp>(loc, containerElemTy, padding); auto stridedMetadata = rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase()); diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir index 3378d32..0cce8c1 100644 --- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir +++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir @@ -61,6 +61,41 @@ func.func @memref_load_i4(%arg0: index) -> i4 { // ----- +func.func @memref_load_f4(%arg0: index) -> f4E2M1FN { + %0 = memref.alloc() : memref<5xf4E2M1FN> + %1 = memref.load %0[%arg0] : memref<5xf4E2M1FN> + return %1 : f4E2M1FN +} +// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 2)> +// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8) +// CHECK: func @memref_load_f4( +// CHECK-SAME: %[[ARG0:.+]]: index +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8> +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] +// CHECK: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] +// CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] +// CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8 +// CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] +// CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4 +// CHECK: %[[BC:.+]] = arith.bitcast %[[TRUNC]] : i4 to f4E2M1FN +// CHECK: return %[[BC]] + +// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 8)> +// CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32) +// CHECK32: func @memref_load_f4( +// CHECK32-SAME: %[[ARG0:.+]]: index +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<1xi32> +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]] +// CHECK32: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]] +// CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]] +// CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32 +// CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]] +// CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4 +// CHECK32: %[[BC:.+]] = arith.bitcast %[[TRUNC]] : i4 to f4E2M1FN +// CHECK32: return %[[BC]] + +// ----- + func.func @memref_load_i4_rank2(%arg0: index, %arg1: index) -> i4 { %0 = memref.alloc() : memref<3x125xi4> %align0 = memref.assume_alignment %0, 64 : memref<3x125xi4> @@ -470,6 +505,29 @@ func.func @rank_zero_memref_store(%arg0: i4) -> () { // ----- +func.func @rank_zero_memref_store_f4(%arg0: f4E2M1FN) -> () { + %0 = memref.alloc() : memref<f4E2M1FN> + memref.store %arg0, %0[] : memref<f4E2M1FN> + return +} +// CHECK-LABEL: func @rank_zero_memref +// CHECK-SAME: %[[ARG0:.+]]: f4E2M1FN +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<i8> +// CHECK: %[[BC:.+]] = arith.bitcast %[[ARG0]] : f4E2M1FN to i4 +// CHECK: %[[EXTUI:.+]] = arith.extui %[[BC]] : i4 to i8 +// CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i8, memref<i8>) -> i8 +// CHECK: return + +// CHECK32-LABEL: func @rank_zero_memref +// CHECK32-SAME: %[[ARG0:.+]]: f4E2M1FN +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<i32> +// CHECK32: %[[BC:.+]] = arith.bitcast %[[ARG0]] : f4E2M1FN to i4 +// CHECK32: %[[EXTUI:.+]] = arith.extui %[[BC]] : i4 to i32 +// CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i32, memref<i32>) -> i32 +// CHECK32: return + +// ----- + func.func @memref_collapse_shape_i4(%idx0 : index, %idx1 : index) -> i4 { %arr = memref.alloc() : memref<32x8x128xi4> %collapse = memref.collapse_shape %arr[[0, 1], [2]] : memref<32x8x128xi4> into memref<256x128xi4> diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir index 6c92449..98b1f07 100644 --- a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir +++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir @@ -53,6 +53,31 @@ func.func @vector_load_i4(%arg1: index, %arg2: index) -> vector<3x8xi4> { // ----- +func.func @vector_load_f4(%arg1: index, %arg2: index) -> vector<3x8xf4E2M1FN> { + %0 = memref.alloc() : memref<3x8xf4E2M1FN> + %cst = arith.constant dense<0.0> : vector<3x8xf4E2M1FN> + %1 = vector.load %0[%arg1, %arg2] : memref<3x8xf4E2M1FN>, vector<8xf4E2M1FN> + %2 = vector.insert %1, %cst [0] : vector<8xf4E2M1FN> into vector<3x8xf4E2M1FN> + return %2 : vector<3x8xf4E2M1FN> +} +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> +// CHECK: func @vector_load_f4 +// CHECK-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8> +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]] +// CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<12xi8>, vector<4xi8> +// CHECK: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<4xi8> to vector<8xf4E2M1FN> + +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> +// CHECK32: func @vector_load_f4 +// CHECK32-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32> +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]] +// CHECK32: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<3xi32>, vector<1xi32> +// CHECK32: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<1xi32> to vector<8xf4E2M1FN> + +// ----- + func.func @vector_load_i4_dynamic(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) -> vector<8xi4> { %0 = memref.alloc(%arg0, %arg1) : memref<?x?xi4> %1 = vector.load %0[%arg2, %arg3] : memref<?x?xi4>, vector<8xi4> @@ -119,6 +144,37 @@ func.func @vector_transfer_read_i4(%arg1: index, %arg2: index) -> vector<8xi4> { // ----- +func.func @vector_transfer_read_f4(%arg1: index, %arg2: index) -> vector<8xf4E2M1FN> { + %c0 = arith.constant 0.0 : f4E2M1FN + %0 = memref.alloc() : memref<3x8xf4E2M1FN> + %1 = vector.transfer_read %0[%arg1, %arg2], %c0 {in_bounds = [true]} : + memref<3x8xf4E2M1FN>, vector<8xf4E2M1FN> + return %1 : vector<8xf4E2M1FN> +} +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> +// CHECK: func @vector_transfer_read_f4 +// CHECK-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK: %[[CONST:.+]] = arith.constant 0.{{0+}}e+00 : f4E2M1FN +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8> +// CHECK: %[[BC:.+]] = arith.bitcast %[[CONST]] : f4E2M1FN to i4 +// CHECK: %[[PAD:.+]] = arith.extui %[[BC]] : i4 to i8 +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]] +// CHECK: %[[VEC:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %[[PAD]] : memref<12xi8>, vector<4xi8> +// CHECK: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<4xi8> to vector<8xf4E2M1FN> + +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> +// CHECK32: func @vector_transfer_read_f4 +// CHECK32-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index) +// CHECK32: %[[CONST:.+]] = arith.constant 0.{{0+}}e+00 : f4E2M1FN +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32> +// CHECK32: %[[BC:.+]] = arith.bitcast %[[CONST]] : f4E2M1FN to i4 +// CHECK32: %[[PAD:.+]] = arith.extui %[[BC]] : i4 to i32 +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]] +// CHECK32: %[[VEC:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %[[PAD]] : memref<3xi32>, vector<1xi32> +// CHECK32: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<1xi32> to vector<8xf4E2M1FN> + +// ----- + ///---------------------------------------------------------------------------------------- /// vector.maskedload ///---------------------------------------------------------------------------------------- @@ -439,6 +495,28 @@ func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) { // ----- +func.func @vector_store_f4(%arg0: vector<8xf4E2M1FN>, %arg1: index, %arg2: index) { + %0 = memref.alloc() : memref<4x8xf4E2M1FN> + vector.store %arg0, %0[%arg1, %arg2] :memref<4x8xf4E2M1FN>, vector<8xf4E2M1FN> + return +} + +// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)> +// CHECK: func @vector_store_f4 +// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<16xi8> +// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xf4E2M1FN> to vector<4xi8> +// CHECK: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<16xi8>, vector<4xi8> + +// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)> +// CHECK32: func @vector_store_f4 +// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<4xi32> +// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]] +// CHECK32: %[[VEC_I32:.+]] = vector.bitcast %[[ARG0]] : vector<8xf4E2M1FN> to vector<1xi32> +// CHECK32: vector.store %[[VEC_I32:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<4xi32>, vector<1xi32> + +// ----- + // FIXME: This example assumes that the store happens at a byte boundary, but // that's not guaranteed. Below is a counter-example with specific dimensions: // vector.store %arg0, %0[0, 3] : memref<2x13xi4>, vector<8xi4> diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp index 17a2b00c..c4e7f96 100644 --- a/offload/liboffload/src/OffloadImpl.cpp +++ b/offload/liboffload/src/OffloadImpl.cpp @@ -231,7 +231,7 @@ Error olShutDown_impl() { for (auto &P : OldContext->Platforms) { // Host plugin is nullptr and has no deinit - if (!P.Plugin) + if (!P.Plugin || !P.Plugin->is_initialized()) continue; if (auto Res = P.Plugin->deinit()) diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel index b429d662..5476a17 100644 --- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel +++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel @@ -2187,6 +2187,16 @@ libc_support_library( ], ) +libc_support_library( + name = "__support_math_ldexpf16", + hdrs = ["src/__support/math/ldexpf16.h"], + deps = [ + ":__support_macros_properties_types", + ":__support_fputil_manipulation_functions", + ":llvm_libc_macros_float16_macros" + ], +) + ############################### complex targets ################################ libc_function( @@ -3347,7 +3357,12 @@ libc_math_function( ], ) -libc_math_function(name = "ldexpf16") +libc_math_function( + name = "ldexpf16", + additional_deps = [ + ":__support_math_ldexpf16", + ], +) libc_math_function(name = "llogb") |