aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAiden Grossman <aidengrossman@google.com>2025-07-14 15:25:06 +0000
committerAiden Grossman <aidengrossman@google.com>2025-07-14 15:25:06 +0000
commit46e896afdbfeb4228229059c0a854e8dd23473fa (patch)
tree9a2b6194f95a500ac869e797d76d2f7e7cfb7977
parent80a1fee56c3a66d0fee90c00008f52f3ea271cc2 (diff)
parent3e43915be62ef355029b61bebe78637a885b8b6c (diff)
downloadllvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.zip
llvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.tar.gz
llvm-users/boomanaiden154/main.cigithub-version-pin-packages-in-windows-container.tar.bz2
Created using spr 1.3.4 [skip ci]
-rw-r--r--libc/shared/math.h1
-rw-r--r--libc/shared/math/ldexpf16.h31
-rw-r--r--libc/src/__support/math/CMakeLists.txt10
-rw-r--r--libc/src/__support/math/ldexpf16.h34
-rw-r--r--libc/src/math/generic/CMakeLists.txt3
-rw-r--r--libc/src/math/generic/ldexpf16.cpp7
-rw-r--r--llvm/lib/Target/AArch64/AArch64SystemOperands.td5
-rw-r--r--llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp2
-rw-r--r--llvm/test/MC/AArch64/armv9.6a-ras.s4
-rw-r--r--llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt2
-rw-r--r--llvm/unittests/MC/SystemZ/CMakeLists.txt4
-rw-r--r--llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp103
-rw-r--r--mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp44
-rw-r--r--mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp14
-rw-r--r--mlir/test/Dialect/MemRef/emulate-narrow-type.mlir58
-rw-r--r--mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir78
-rw-r--r--offload/liboffload/src/OffloadImpl.cpp2
-rw-r--r--utils/bazel/llvm-project-overlay/libc/BUILD.bazel17
18 files changed, 393 insertions, 26 deletions
diff --git a/libc/shared/math.h b/libc/shared/math.h
index 7ad6b7e..ba6b1c2 100644
--- a/libc/shared/math.h
+++ b/libc/shared/math.h
@@ -17,5 +17,6 @@
#include "math/frexpf128.h"
#include "math/frexpf16.h"
#include "math/ldexpf128.h"
+#include "math/ldexpf16.h"
#endif // LLVM_LIBC_SHARED_MATH_H
diff --git a/libc/shared/math/ldexpf16.h b/libc/shared/math/ldexpf16.h
new file mode 100644
index 0000000..4c98c4c
--- /dev/null
+++ b/libc/shared/math/ldexpf16.h
@@ -0,0 +1,31 @@
+//===-- Shared ldexpf16 function --------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_MATH_LDEXPF16_H
+#define LLVM_LIBC_SHARED_MATH_LDEXPF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "shared/libc_common.h"
+#include "src/__support/math/ldexpf16.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace shared {
+
+using math::ldexpf16;
+
+} // namespace shared
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SHARED_MATH_LDEXPF16_H
diff --git a/libc/src/__support/math/CMakeLists.txt b/libc/src/__support/math/CMakeLists.txt
index 4adfc33..2215695 100644
--- a/libc/src/__support/math/CMakeLists.txt
+++ b/libc/src/__support/math/CMakeLists.txt
@@ -92,3 +92,13 @@ add_header_library(
libc.src.__support.FPUtil.manipulation_functions
libc.include.llvm-libc-types.float128
)
+
+add_header_library(
+ ldexpf16
+ HDRS
+ ldexpf16.h
+ DEPENDS
+ libc.src.__support.macros.properties.types
+ libc.src.__support.FPUtil.manipulation_functions
+ libc.include.llvm-libc-macros.float16_macros
+)
diff --git a/libc/src/__support/math/ldexpf16.h b/libc/src/__support/math/ldexpf16.h
new file mode 100644
index 0000000..fbead87
--- /dev/null
+++ b/libc/src/__support/math/ldexpf16.h
@@ -0,0 +1,34 @@
+//===-- Implementation header for ldexpf16 ----------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H
+#define LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H
+
+#include "include/llvm-libc-macros/float16-macros.h"
+
+#ifdef LIBC_TYPES_HAS_FLOAT16
+
+#include "src/__support/FPUtil/ManipulationFunctions.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+
+namespace LIBC_NAMESPACE_DECL {
+
+namespace math {
+
+static constexpr float16 ldexpf16(float16 x, int exp) {
+ return fputil::ldexp(x, exp);
+}
+
+} // namespace math
+
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LIBC_TYPES_HAS_FLOAT16
+
+#endif // LLVM_LIBC_SRC___SUPPORT_MATH_LDEXPF16_H
diff --git a/libc/src/math/generic/CMakeLists.txt b/libc/src/math/generic/CMakeLists.txt
index 4ad7e71..8be57cd 100644
--- a/libc/src/math/generic/CMakeLists.txt
+++ b/libc/src/math/generic/CMakeLists.txt
@@ -1933,8 +1933,7 @@ add_entrypoint_object(
HDRS
../ldexpf16.h
DEPENDS
- libc.src.__support.macros.properties.types
- libc.src.__support.FPUtil.manipulation_functions
+ libc.src.__support.math.ldexpf16
)
add_entrypoint_object(
diff --git a/libc/src/math/generic/ldexpf16.cpp b/libc/src/math/generic/ldexpf16.cpp
index caa344b..ecf1633 100644
--- a/libc/src/math/generic/ldexpf16.cpp
+++ b/libc/src/math/generic/ldexpf16.cpp
@@ -7,14 +7,13 @@
//===----------------------------------------------------------------------===//
#include "src/math/ldexpf16.h"
-#include "src/__support/FPUtil/ManipulationFunctions.h"
-#include "src/__support/common.h"
-#include "src/__support/macros/config.h"
+
+#include "src/__support/math/ldexpf16.h"
namespace LIBC_NAMESPACE_DECL {
LLVM_LIBC_FUNCTION(float16, ldexpf16, (float16 x, int exp)) {
- return fputil::ldexp(x, exp);
+ return math::ldexpf16(x, exp);
}
} // namespace LIBC_NAMESPACE_DECL
diff --git a/llvm/lib/Target/AArch64/AArch64SystemOperands.td b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
index 1f3d619..1b0e90b 100644
--- a/llvm/lib/Target/AArch64/AArch64SystemOperands.td
+++ b/llvm/lib/Target/AArch64/AArch64SystemOperands.td
@@ -2387,6 +2387,9 @@ def : RWSysReg<"TRBSR_EL3", 0b11, 0b110, 0b1001, 0b1011, 0b011>;
// v9.6 FEAT_PoPS
//
let Requires = [{ {AArch64::FeaturePoPS} }] in {
-def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>;
def : DC<"CIVAPS", 0b000, 0b0111, 0b1111, 0b001>;
}
+
+let Requires = [{ {AArch64::FeaturePoPS, AArch64::FeatureMTE} }] in {
+def : DC<"CIGDVAPS", 0b000, 0b0111, 0b1111, 0b101>;
+}
diff --git a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
index 6ae529e..31b4f11 100644
--- a/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
+++ b/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp
@@ -327,6 +327,8 @@ DecodeStatus SystemZDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
ArrayRef<uint8_t> Bytes,
uint64_t Address,
raw_ostream &CS) const {
+ CommentStream = &CS;
+
// Get the first two bytes of the instruction.
Size = 0;
if (Bytes.size() < 2)
diff --git a/llvm/test/MC/AArch64/armv9.6a-ras.s b/llvm/test/MC/AArch64/armv9.6a-ras.s
index 80fcb6b..3cf240c 100644
--- a/llvm/test/MC/AArch64/armv9.6a-ras.s
+++ b/llvm/test/MC/AArch64/armv9.6a-ras.s
@@ -1,9 +1,9 @@
// RUN: not llvm-mc -triple aarch64 -show-encoding < %s 2>&1 | FileCheck %s --check-prefix NO-POPS
-// RUN: llvm-mc -triple aarch64 -mattr=+pops -show-encoding < %s 2>&1 | FileCheck %s --check-prefix HAS-POPS
+// RUN: llvm-mc -triple aarch64 -mattr=+pops,+mte -show-encoding < %s 2>&1 | FileCheck %s --check-prefix HAS-POPS
dc CIGDVAPS, x3
dc CIVAPS, x3
-// NO-POPS: error: DC CIGDVAPS requires: pops
+// NO-POPS: error: DC CIGDVAPS requires: mte, memtag, pops
// NO-POPS: error: DC CIVAPS requires: pops
# HAS-POPS: dc cigdvaps, x3 // encoding: [0xa3,0x7f,0x08,0xd5]
diff --git a/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt b/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt
index d6bf657..f2971bc 100644
--- a/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt
+++ b/llvm/test/MC/Disassembler/AArch64/armv9.6a-ras.txt
@@ -1,4 +1,4 @@
-# RUN: llvm-mc -triple aarch64 -mattr=+pops -disassemble < %s | FileCheck %s
+# RUN: llvm-mc -triple aarch64 -mattr=+pops,+mte -disassemble < %s | FileCheck %s
#------------------------------------------------------------------------------
# ARMV9.6-A RAS Extensions
diff --git a/llvm/unittests/MC/SystemZ/CMakeLists.txt b/llvm/unittests/MC/SystemZ/CMakeLists.txt
index 3b7af4a..6c10778 100644
--- a/llvm/unittests/MC/SystemZ/CMakeLists.txt
+++ b/llvm/unittests/MC/SystemZ/CMakeLists.txt
@@ -4,12 +4,14 @@ include_directories(
set(LLVM_LINK_COMPONENTS
SystemZ
+ MCDisassembler
MCParser
MC
Support
TargetParser
)
-add_llvm_unittest(SystemZAsmLexerTests
+add_llvm_unittest(SystemZMCTests
SystemZAsmLexerTest.cpp
+ SystemZMCDisassemblerTest.cpp
)
diff --git a/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp
new file mode 100644
index 0000000..df59fcb
--- /dev/null
+++ b/llvm/unittests/MC/SystemZ/SystemZMCDisassemblerTest.cpp
@@ -0,0 +1,103 @@
+//===- SystemZMCDisassemblerTest.cpp - Tests for SystemZ MCDisassembler ---===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Context {
+ const char *TripleName = "systemz-unknown";
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCContext> Ctx;
+ std::unique_ptr<MCSubtargetInfo> STI;
+ std::unique_ptr<MCDisassembler> DisAsm;
+
+ Context() {
+ LLVMInitializeSystemZTargetInfo();
+ LLVMInitializeSystemZTargetMC();
+ LLVMInitializeSystemZDisassembler();
+
+ // If we didn't build SystemZ, do not run the test.
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+ if (!TheTarget)
+ return;
+
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+ STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+ Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+ STI.get());
+
+ DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+ }
+
+ operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+ static Context Ctxt;
+ return Ctxt;
+}
+
+class SystemZMCSymbolizerTest : public MCSymbolizer {
+public:
+ SystemZMCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+ ~SystemZMCSymbolizerTest() {}
+
+ bool tryAddingSymbolicOperand([[maybe_unused]] MCInst &Inst,
+ [[maybe_unused]] raw_ostream &CStream,
+ [[maybe_unused]] int64_t Value,
+ [[maybe_unused]] uint64_t Address,
+ [[maybe_unused]] bool IsBranch,
+ [[maybe_unused]] uint64_t Offset,
+ [[maybe_unused]] uint64_t OpSize,
+ [[maybe_unused]] uint64_t InstSize) override {
+ return true;
+ }
+
+ void
+ tryAddingPcLoadReferenceComment([[maybe_unused]] raw_ostream &cStream,
+ [[maybe_unused]] int64_t Value,
+ [[maybe_unused]] uint64_t Address) override {}
+};
+
+} // namespace
+
+TEST(SystemZDisassembler, SystemZMCSymbolizerTest) {
+ SystemZMCSymbolizerTest *TestSymbolizer =
+ new SystemZMCSymbolizerTest(getContext());
+ getContext().DisAsm->setSymbolizer(
+ std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+ MCInst Inst;
+ uint64_t InstSize;
+
+ // Check that the SystemZ disassembler sets the comment stream before calling
+ // MCDisassembler::tryAddingSymbolicOperand. This will fail an assert if it
+ // does not do that.
+ MCDisassembler::DecodeStatus Status = getContext().DisAsm->getInstruction(
+ Inst, InstSize,
+ // lgrl %r1, 0x1234
+ {0xc4, 0x18, 0x00, 0x00, 0x9a, 0x1a}, 0, nulls());
+ ASSERT_TRUE(Status == MCDisassembler::Success);
+ EXPECT_EQ(InstSize, uint64_t{6});
+}
diff --git a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
index d2a0326..ec2bc95 100644
--- a/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/MemRef/Transforms/EmulateNarrowType.cpp
@@ -323,19 +323,28 @@ struct ConvertMemRefLoad final : OpConversionPattern<memref::LoadOp> {
// It is not clear if this case actually happens in practice, but we keep
// the operations just in case. Otherwise, if the arith computation bitwidth
// is different from the emulated bitwidth we truncate the result.
- Operation *result;
+ Value result;
auto resultTy = getTypeConverter()->convertType(oldElementType);
- if (resultTy == convertedElementType) {
+ auto conversionTy =
+ resultTy.isInteger()
+ ? resultTy
+ : IntegerType::get(rewriter.getContext(),
+ resultTy.getIntOrFloatBitWidth());
+ if (conversionTy == convertedElementType) {
auto mask = rewriter.create<arith::ConstantOp>(
loc, convertedElementType,
rewriter.getIntegerAttr(convertedElementType, (1 << srcBits) - 1));
result = rewriter.create<arith::AndIOp>(loc, bitsLoad, mask);
} else {
- result = rewriter.create<arith::TruncIOp>(loc, resultTy, bitsLoad);
+ result = rewriter.create<arith::TruncIOp>(loc, conversionTy, bitsLoad);
}
- rewriter.replaceOp(op, result->getResult(0));
+ if (conversionTy != resultTy) {
+ result = rewriter.create<arith::BitcastOp>(loc, resultTy, result);
+ }
+
+ rewriter.replaceOp(op, result);
return success();
}
};
@@ -415,8 +424,18 @@ struct ConvertMemrefStore final : OpConversionPattern<memref::StoreOp> {
}
Location loc = op.getLoc();
- Value extendedInput = rewriter.create<arith::ExtUIOp>(loc, dstIntegerType,
- adaptor.getValue());
+
+ // Pad the input value with 0s on the left.
+ Value input = adaptor.getValue();
+ if (!input.getType().isInteger()) {
+ input = rewriter.create<arith::BitcastOp>(
+ loc,
+ IntegerType::get(rewriter.getContext(),
+ input.getType().getIntOrFloatBitWidth()),
+ input);
+ }
+ Value extendedInput =
+ rewriter.create<arith::ExtUIOp>(loc, dstIntegerType, input);
// Special case 0-rank memref stores. No need for masking.
if (convertedType.getRank() == 0) {
@@ -619,11 +638,11 @@ void memref::populateMemRefNarrowTypeEmulationConversions(
arith::NarrowTypeEmulationConverter &typeConverter) {
typeConverter.addConversion(
[&typeConverter](MemRefType ty) -> std::optional<Type> {
- auto intTy = dyn_cast<IntegerType>(ty.getElementType());
- if (!intTy)
+ Type elementType = ty.getElementType();
+ if (!elementType.isIntOrFloat())
return ty;
- unsigned width = intTy.getWidth();
+ unsigned width = elementType.getIntOrFloatBitWidth();
unsigned loadStoreWidth = typeConverter.getLoadStoreBitwidth();
if (width >= loadStoreWidth)
return ty;
@@ -636,8 +655,11 @@ void memref::populateMemRefNarrowTypeEmulationConversions(
if (!strides.empty() && strides.back() != 1)
return nullptr;
- auto newElemTy = IntegerType::get(ty.getContext(), loadStoreWidth,
- intTy.getSignedness());
+ auto newElemTy = IntegerType::get(
+ ty.getContext(), loadStoreWidth,
+ elementType.isInteger()
+ ? cast<IntegerType>(elementType).getSignedness()
+ : IntegerType::SignednessSemantics::Signless);
if (!newElemTy)
return nullptr;
diff --git a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
index 004bead..0fe0841 100644
--- a/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
+++ b/mlir/lib/Dialect/Vector/Transforms/VectorEmulateNarrowType.cpp
@@ -1268,8 +1268,18 @@ struct ConvertVectorTransferRead final
bool isDivisibleInSize =
fitsInMultiByteContainerTy(op.getVectorType(), containerElemTy);
- auto newPadding = rewriter.create<arith::ExtUIOp>(loc, containerElemTy,
- adaptor.getPadding());
+ // Pad the padding value with 0s on the left. These bits are discarded and
+ // thus their values don't matter.
+ Value padding = adaptor.getPadding();
+ if (!padding.getType().isInteger()) {
+ padding = rewriter.create<arith::BitcastOp>(
+ loc,
+ IntegerType::get(rewriter.getContext(),
+ padding.getType().getIntOrFloatBitWidth()),
+ padding);
+ }
+ auto newPadding =
+ rewriter.create<arith::ExtUIOp>(loc, containerElemTy, padding);
auto stridedMetadata =
rewriter.create<memref::ExtractStridedMetadataOp>(loc, op.getBase());
diff --git a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
index 3378d32..0cce8c1 100644
--- a/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
+++ b/mlir/test/Dialect/MemRef/emulate-narrow-type.mlir
@@ -61,6 +61,41 @@ func.func @memref_load_i4(%arg0: index) -> i4 {
// -----
+func.func @memref_load_f4(%arg0: index) -> f4E2M1FN {
+ %0 = memref.alloc() : memref<5xf4E2M1FN>
+ %1 = memref.load %0[%arg0] : memref<5xf4E2M1FN>
+ return %1 : f4E2M1FN
+}
+// CHECK-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 2)>
+// CHECK-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 2) * 8)
+// CHECK: func @memref_load_f4(
+// CHECK-SAME: %[[ARG0:.+]]: index
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<3xi8>
+// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
+// CHECK: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]]
+// CHECK: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]]
+// CHECK: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i8
+// CHECK: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]]
+// CHECK: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i8 to i4
+// CHECK: %[[BC:.+]] = arith.bitcast %[[TRUNC]] : i4 to f4E2M1FN
+// CHECK: return %[[BC]]
+
+// CHECK32-DAG: #[[MAP0:.+]] = affine_map<()[s0] -> (s0 floordiv 8)>
+// CHECK32-DAG: #[[MAP1:.+]] = affine_map<()[s0] -> (s0 * 4 - (s0 floordiv 8) * 32)
+// CHECK32: func @memref_load_f4(
+// CHECK32-SAME: %[[ARG0:.+]]: index
+// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<1xi32>
+// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP0]]()[%[[ARG0]]]
+// CHECK32: %[[LOADVAL:.+]] = memref.load %[[ALLOC]][%[[INDEX]]]
+// CHECK32: %[[BITOFFSET:.+]] = affine.apply #[[MAP1]]()[%[[ARG0]]]
+// CHECK32: %[[CAST:.+]] = arith.index_cast %[[BITOFFSET]] : index to i32
+// CHECK32: %[[SHIFTRT:.+]] = arith.shrsi %[[LOADVAL]], %[[CAST]]
+// CHECK32: %[[TRUNC:.+]] = arith.trunci %[[SHIFTRT]] : i32 to i4
+// CHECK32: %[[BC:.+]] = arith.bitcast %[[TRUNC]] : i4 to f4E2M1FN
+// CHECK32: return %[[BC]]
+
+// -----
+
func.func @memref_load_i4_rank2(%arg0: index, %arg1: index) -> i4 {
%0 = memref.alloc() : memref<3x125xi4>
%align0 = memref.assume_alignment %0, 64 : memref<3x125xi4>
@@ -470,6 +505,29 @@ func.func @rank_zero_memref_store(%arg0: i4) -> () {
// -----
+func.func @rank_zero_memref_store_f4(%arg0: f4E2M1FN) -> () {
+ %0 = memref.alloc() : memref<f4E2M1FN>
+ memref.store %arg0, %0[] : memref<f4E2M1FN>
+ return
+}
+// CHECK-LABEL: func @rank_zero_memref
+// CHECK-SAME: %[[ARG0:.+]]: f4E2M1FN
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<i8>
+// CHECK: %[[BC:.+]] = arith.bitcast %[[ARG0]] : f4E2M1FN to i4
+// CHECK: %[[EXTUI:.+]] = arith.extui %[[BC]] : i4 to i8
+// CHECK: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i8, memref<i8>) -> i8
+// CHECK: return
+
+// CHECK32-LABEL: func @rank_zero_memref
+// CHECK32-SAME: %[[ARG0:.+]]: f4E2M1FN
+// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<i32>
+// CHECK32: %[[BC:.+]] = arith.bitcast %[[ARG0]] : f4E2M1FN to i4
+// CHECK32: %[[EXTUI:.+]] = arith.extui %[[BC]] : i4 to i32
+// CHECK32: %[[WRITE_RMW:.+]] = memref.atomic_rmw assign %[[EXTUI]], %[[ALLOC]][] : (i32, memref<i32>) -> i32
+// CHECK32: return
+
+// -----
+
func.func @memref_collapse_shape_i4(%idx0 : index, %idx1 : index) -> i4 {
%arr = memref.alloc() : memref<32x8x128xi4>
%collapse = memref.collapse_shape %arr[[0, 1], [2]] : memref<32x8x128xi4> into memref<256x128xi4>
diff --git a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir
index 6c92449..98b1f07 100644
--- a/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir
+++ b/mlir/test/Dialect/Vector/vector-emulate-narrow-type.mlir
@@ -53,6 +53,31 @@ func.func @vector_load_i4(%arg1: index, %arg2: index) -> vector<3x8xi4> {
// -----
+func.func @vector_load_f4(%arg1: index, %arg2: index) -> vector<3x8xf4E2M1FN> {
+ %0 = memref.alloc() : memref<3x8xf4E2M1FN>
+ %cst = arith.constant dense<0.0> : vector<3x8xf4E2M1FN>
+ %1 = vector.load %0[%arg1, %arg2] : memref<3x8xf4E2M1FN>, vector<8xf4E2M1FN>
+ %2 = vector.insert %1, %cst [0] : vector<8xf4E2M1FN> into vector<3x8xf4E2M1FN>
+ return %2 : vector<3x8xf4E2M1FN>
+}
+// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>
+// CHECK: func @vector_load_f4
+// CHECK-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index)
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8>
+// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]]
+// CHECK: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<12xi8>, vector<4xi8>
+// CHECK: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<4xi8> to vector<8xf4E2M1FN>
+
+// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)>
+// CHECK32: func @vector_load_f4
+// CHECK32-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index)
+// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32>
+// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]]
+// CHECK32: %[[VEC:.+]] = vector.load %[[ALLOC]][%[[INDEX]]] : memref<3xi32>, vector<1xi32>
+// CHECK32: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<1xi32> to vector<8xf4E2M1FN>
+
+// -----
+
func.func @vector_load_i4_dynamic(%arg0 : index, %arg1 : index, %arg2 : index, %arg3 : index) -> vector<8xi4> {
%0 = memref.alloc(%arg0, %arg1) : memref<?x?xi4>
%1 = vector.load %0[%arg2, %arg3] : memref<?x?xi4>, vector<8xi4>
@@ -119,6 +144,37 @@ func.func @vector_transfer_read_i4(%arg1: index, %arg2: index) -> vector<8xi4> {
// -----
+func.func @vector_transfer_read_f4(%arg1: index, %arg2: index) -> vector<8xf4E2M1FN> {
+ %c0 = arith.constant 0.0 : f4E2M1FN
+ %0 = memref.alloc() : memref<3x8xf4E2M1FN>
+ %1 = vector.transfer_read %0[%arg1, %arg2], %c0 {in_bounds = [true]} :
+ memref<3x8xf4E2M1FN>, vector<8xf4E2M1FN>
+ return %1 : vector<8xf4E2M1FN>
+}
+// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>
+// CHECK: func @vector_transfer_read_f4
+// CHECK-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index)
+// CHECK: %[[CONST:.+]] = arith.constant 0.{{0+}}e+00 : f4E2M1FN
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<12xi8>
+// CHECK: %[[BC:.+]] = arith.bitcast %[[CONST]] : f4E2M1FN to i4
+// CHECK: %[[PAD:.+]] = arith.extui %[[BC]] : i4 to i8
+// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]]
+// CHECK: %[[VEC:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %[[PAD]] : memref<12xi8>, vector<4xi8>
+// CHECK: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<4xi8> to vector<8xf4E2M1FN>
+
+// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)>
+// CHECK32: func @vector_transfer_read_f4
+// CHECK32-SAME: (%[[ARG0:[a-zA-Z0-9]+]]: index, %[[ARG1:[a-zA-Z0-9]+]]: index)
+// CHECK32: %[[CONST:.+]] = arith.constant 0.{{0+}}e+00 : f4E2M1FN
+// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<3xi32>
+// CHECK32: %[[BC:.+]] = arith.bitcast %[[CONST]] : f4E2M1FN to i4
+// CHECK32: %[[PAD:.+]] = arith.extui %[[BC]] : i4 to i32
+// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG0]], %[[ARG1]]]
+// CHECK32: %[[VEC:.+]] = vector.transfer_read %[[ALLOC]][%[[INDEX]]], %[[PAD]] : memref<3xi32>, vector<1xi32>
+// CHECK32: %[[VEC_F4:.+]] = vector.bitcast %[[VEC]] : vector<1xi32> to vector<8xf4E2M1FN>
+
+// -----
+
///----------------------------------------------------------------------------------------
/// vector.maskedload
///----------------------------------------------------------------------------------------
@@ -439,6 +495,28 @@ func.func @vector_store_i4(%arg0: vector<8xi4>, %arg1: index, %arg2: index) {
// -----
+func.func @vector_store_f4(%arg0: vector<8xf4E2M1FN>, %arg1: index, %arg2: index) {
+ %0 = memref.alloc() : memref<4x8xf4E2M1FN>
+ vector.store %arg0, %0[%arg1, %arg2] :memref<4x8xf4E2M1FN>, vector<8xf4E2M1FN>
+ return
+}
+
+// CHECK-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 * 4 + s1 floordiv 2)>
+// CHECK: func @vector_store_f4
+// CHECK: %[[ALLOC:.+]] = memref.alloc() : memref<16xi8>
+// CHECK: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]]
+// CHECK: %[[VEC_I8:.+]] = vector.bitcast %[[ARG0]] : vector<8xf4E2M1FN> to vector<4xi8>
+// CHECK: vector.store %[[VEC_I8:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<16xi8>, vector<4xi8>
+
+// CHECK32-DAG: #[[MAP:.+]] = affine_map<()[s0, s1] -> (s0 + s1 floordiv 8)>
+// CHECK32: func @vector_store_f4
+// CHECK32: %[[ALLOC:.+]] = memref.alloc() : memref<4xi32>
+// CHECK32: %[[INDEX:.+]] = affine.apply #[[MAP]]()[%[[ARG1]], %[[ARG2]]]
+// CHECK32: %[[VEC_I32:.+]] = vector.bitcast %[[ARG0]] : vector<8xf4E2M1FN> to vector<1xi32>
+// CHECK32: vector.store %[[VEC_I32:.+]], %[[ALLOC:.+]][%[[INDEX:.+]]] : memref<4xi32>, vector<1xi32>
+
+// -----
+
// FIXME: This example assumes that the store happens at a byte boundary, but
// that's not guaranteed. Below is a counter-example with specific dimensions:
// vector.store %arg0, %0[0, 3] : memref<2x13xi4>, vector<8xi4>
diff --git a/offload/liboffload/src/OffloadImpl.cpp b/offload/liboffload/src/OffloadImpl.cpp
index 17a2b00c..c4e7f96 100644
--- a/offload/liboffload/src/OffloadImpl.cpp
+++ b/offload/liboffload/src/OffloadImpl.cpp
@@ -231,7 +231,7 @@ Error olShutDown_impl() {
for (auto &P : OldContext->Platforms) {
// Host plugin is nullptr and has no deinit
- if (!P.Plugin)
+ if (!P.Plugin || !P.Plugin->is_initialized())
continue;
if (auto Res = P.Plugin->deinit())
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b429d662..5476a17 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -2187,6 +2187,16 @@ libc_support_library(
],
)
+libc_support_library(
+ name = "__support_math_ldexpf16",
+ hdrs = ["src/__support/math/ldexpf16.h"],
+ deps = [
+ ":__support_macros_properties_types",
+ ":__support_fputil_manipulation_functions",
+ ":llvm_libc_macros_float16_macros"
+ ],
+)
+
############################### complex targets ################################
libc_function(
@@ -3347,7 +3357,12 @@ libc_math_function(
],
)
-libc_math_function(name = "ldexpf16")
+libc_math_function(
+ name = "ldexpf16",
+ additional_deps = [
+ ":__support_math_ldexpf16",
+ ],
+)
libc_math_function(name = "llogb")