diff options
author | R <rqou@berkeley.edu> | 2025-03-11 02:01:57 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-03-11 02:01:57 +0000 |
commit | 1dffe8f364a4d5ed0b1efc981a1d9a30166aae05 (patch) | |
tree | e668f5f576caf123d404987209ebd5010813552b | |
parent | 3226617f023d46708414f867c726695a1a2dd562 (diff) | |
download | llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.zip llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.gz llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.bz2 |
Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit (#130386)
Previous PR: https://github.com/llvm/llvm-project/pull/129308
Changes:
* The alloc-32.fir test is now marked as requiring the X86 target.
* Drive-by fixes uncovered when fixing tests involving malloc
-rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 22 | ||||
-rw-r--r-- | flang/lib/Optimizer/CodeGen/TypeConverter.cpp | 19 | ||||
-rw-r--r-- | flang/test/Fir/alloc-32.fir | 30 | ||||
-rw-r--r-- | flang/test/Fir/alloc.fir | 4 | ||||
-rw-r--r-- | flang/test/Integration/OpenMP/private-global.f90 | 10 | ||||
-rw-r--r-- | flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 | 2 | ||||
-rw-r--r-- | flang/test/Lower/forall/character-1.f90 | 4 | ||||
-rw-r--r-- | llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp | 4 |
8 files changed, 79 insertions, 16 deletions
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index a2743ed..b5b2f39 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -982,7 +982,8 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> { template <typename ModuleOp> static mlir::SymbolRefAttr getMallocInModule(ModuleOp mod, fir::AllocMemOp op, - mlir::ConversionPatternRewriter &rewriter) { + mlir::ConversionPatternRewriter &rewriter, + mlir::Type indexType) { static constexpr char mallocName[] = "malloc"; if (auto mallocFunc = mod.template lookupSymbol<mlir::LLVM::LLVMFuncOp>(mallocName)) @@ -992,7 +993,6 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op, return mlir::SymbolRefAttr::get(userMalloc); mlir::OpBuilder moduleBuilder(mod.getBodyRegion()); - auto indexType = mlir::IntegerType::get(op.getContext(), 64); auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>( op.getLoc(), mallocName, mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()), @@ -1002,12 +1002,13 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op, } /// Return the LLVMFuncOp corresponding to the standard malloc call. -static mlir::SymbolRefAttr -getMalloc(fir::AllocMemOp op, mlir::ConversionPatternRewriter &rewriter) { +static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op, + mlir::ConversionPatternRewriter &rewriter, + mlir::Type indexType) { if (auto mod = op->getParentOfType<mlir::gpu::GPUModuleOp>()) - return getMallocInModule(mod, op, rewriter); + return getMallocInModule(mod, op, rewriter, indexType); auto mod = op->getParentOfType<mlir::ModuleOp>(); - return getMallocInModule(mod, op, rewriter); + return getMallocInModule(mod, op, rewriter, indexType); } /// Helper function for generating the LLVM IR that computes the distance @@ -1067,7 +1068,12 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> { for (mlir::Value opnd : adaptor.getOperands()) size = rewriter.create<mlir::LLVM::MulOp>( loc, ity, size, integerCast(loc, rewriter, ity, opnd)); - heap->setAttr("callee", getMalloc(heap, rewriter)); + auto mallocTyWidth = lowerTy().getIndexTypeBitwidth(); + auto mallocTy = + mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth); + if (mallocTyWidth != ity.getIntOrFloatBitWidth()) + size = integerCast(loc, rewriter, mallocTy, size); + heap->setAttr("callee", getMalloc(heap, rewriter, mallocTy)); rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>( heap, ::getLlvmPtrType(heap.getContext()), size, addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1)); @@ -2116,7 +2122,7 @@ private: unsigned dim = iter.index(); mlir::Value lb = one; if (!lbounds.empty()) { - lb = lbounds[dim]; + lb = integerCast(loc, rewriter, lowerTy().indexType(), lbounds[dim]); auto extentIsEmpty = rewriter.create<mlir::LLVM::ICmpOp>( loc, mlir::LLVM::ICmpPredicate::eq, extent, zero); lb = rewriter.create<mlir::LLVM::SelectOp>(loc, extentIsEmpty, one, lb); diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp index 89f4984..1a1d3a8 100644 --- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp +++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp @@ -28,10 +28,27 @@ namespace fir { +static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) { + llvm::StringRef dataLayoutString; + auto dataLayoutAttr = module->template getAttrOfType<mlir::StringAttr>( + mlir::LLVM::LLVMDialect::getDataLayoutAttrName()); + if (dataLayoutAttr) + dataLayoutString = dataLayoutAttr.getValue(); + + auto options = mlir::LowerToLLVMOptions(module.getContext()); + auto llvmDL = llvm::DataLayout(dataLayoutString); + if (llvmDL.getPointerSizeInBits(0) == 32) { + // FIXME: Should translateDataLayout in the MLIR layer be doing this? + options.overrideIndexBitwidth(32); + } + options.dataLayout = llvmDL; + return options; +} + LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA, bool forceUnifiedTBAATree, const mlir::DataLayout &dl) - : mlir::LLVMTypeConverter(module.getContext()), + : mlir::LLVMTypeConverter(module.getContext(), MakeLowerOptions(module)), kindMapping(getKindMapping(module)), specifics(CodeGenSpecifics::get( module.getContext(), getTargetTriple(module), getKindMapping(module), diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir new file mode 100644 index 0000000..3eefc32 --- /dev/null +++ b/flang/test/Fir/alloc-32.fir @@ -0,0 +1,30 @@ +// RUN: %flang_fc1 -triple i686 -emit-llvm %s -o - | FileCheck %s +// REQUIRES: x86-registered-target + +// This is a check for calling malloc using i32 when on a 32-bit target (only). +// It doesn't contain the comprehensive tests that alloc.fir has, and +// that file should be used to exercise most code paths. + +module attributes { + fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "i686" +} { + +// CHECK-LABEL: define ptr @allocmem_scalar_nonchar( +// CHECK: call ptr @malloc(i32 4) +func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> { + %1 = fir.allocmem i32 + return %1 : !fir.heap<i32> +} + +// CHECK-LABEL: define ptr @allocmem_scalar_dynchar( +// CHECK-SAME: i32 %[[len:.*]]) +// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64 +// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]] +// CHECK: %[[trunc:.*]] = trunc i64 %[[mul2]] to i32 +// CHECK: call ptr @malloc(i32 %[[trunc]]) +func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> { + %1 = fir.allocmem !fir.char<1,?>(%l : i32) + return %1 : !fir.heap<!fir.char<1,?>> +} + +} diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir index ba9b08d..5b4930b 100644 --- a/flang/test/Fir/alloc.fir +++ b/flang/test/Fir/alloc.fir @@ -2,6 +2,10 @@ // RUN: %flang_fc1 -emit-llvm %s -o - | FileCheck %s // UNSUPPORTED: system-windows +// Disabled on 32-bit targets due to the additional `trunc` opcodes required +// UNSUPPORTED: target-x86 +// UNSUPPORTED: target=sparc-{{.*}} +// UNSUPPORTED: target=sparcel-{{.*}} // CHECK-LABEL: define ptr @alloca_scalar_nonchar() // CHECK: alloca i32, i64 1 diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90 index 1aacfb4..8f8de8c 100644 --- a/flang/test/Integration/OpenMP/private-global.f90 +++ b/flang/test/Integration/OpenMP/private-global.f90 @@ -31,14 +31,14 @@ End Program ! CHECK: %[[FIFTY:.*]] = alloca i32, i64 1, align 4 ! CHECK: %[[INTERMEDIATE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8 ! CHECK: %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8 -! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false) +! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false) ! CHECK: store i32 50, ptr %[[FIFTY]], align 4 ! CHECK: %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0 -! CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align 8 -! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 48, i1 false) +! CHECK: store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align {{[48]}} +! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 {{4[48]}}, i1 false) ! CHECK: call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9) -! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false) -! CHECK: %[[PRIV_TABLE:.*]] = call ptr @malloc(i64 40) +! CHECK: call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false) +! CHECK: %[[PRIV_TABLE:.*]] = call ptr @malloc(i{{(32)|(64)}} 40) ! ... ! check that we use the private copy of table for table/=50 ! CHECK: omp.par.region3: diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 index be25169..f769fd3 100644 --- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 +++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 @@ -36,7 +36,7 @@ end subroutine proc !CHECK: [[MALLOC_BB]]: !CHECK-NOT: omp.par.{{.*}}: -!CHECK: call ptr @malloc(i64 80) +!CHECK: call ptr @malloc(i{{(32)|(64)}} 80) !CHECK: %[[RED_ARR_0:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 0 !CHECK: store ptr %[[F_priv]], ptr %[[RED_ARR_0:.*]] diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90 index d5f968b..69064dd 100644 --- a/flang/test/Lower/forall/character-1.f90 +++ b/flang/test/Lower/forall/character-1.f90 @@ -2,6 +2,10 @@ ! RUN: %flang -emit-llvm -flang-deprecated-no-hlfir -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s ! Test from Fortran source through to LLVM IR. ! UNSUPPORTED: system-windows +! Disabled on 32-bit targets due to the additional `trunc` opcodes required +! UNSUPPORTED: target-x86 +! UNSUPPORTED: target=sparc-{{.*}} +! UNSUPPORTED: target=sparcel-{{.*}} ! Assumed size array of assumed length character. program test diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp index 014b1c8..7718d8a 100644 --- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp +++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp @@ -3771,6 +3771,8 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, // Emit a call to the runtime function that orchestrates the reduction. // Declare the reduction function in the process. + Type *IndexTy = Builder.getIndexTy( + M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace()); Function *Func = Builder.GetInsertBlock()->getParent(); Module *Module = Func->getParent(); uint32_t SrcLocStrSize; @@ -3786,7 +3788,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc, Constant *NumVariables = Builder.getInt32(NumReductions); const DataLayout &DL = Module->getDataLayout(); unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy); - Constant *RedArraySize = Builder.getInt64(RedArrayByteSize); + Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize); Function *ReductionFunc = getFreshReductionFunc(*Module); Value *Lock = getOMPCriticalRegionLock(".reduction"); Function *ReduceFunc = getOrCreateRuntimeFunctionPtr( |