Reland [flang] In AllocMemOp lowering, convert types for calling malloc on 32-bit (#130386)

Previous PR: https://github.com/llvm/llvm-project/pull/129308 Changes: * The alloc-32.fir test is now marked as requiring the X86 target. * Drive-by fixes uncovered when fixing tests involving malloc
author: R <rqou@berkeley.edu> 2025-03-11 02:01:57 +0000
committer: GitHub <noreply@github.com> 2025-03-11 02:01:57 +0000
commit: 1dffe8f364a4d5ed0b1efc981a1d9a30166aae05 (patch)
tree: e668f5f576caf123d404987209ebd5010813552b
parent: 3226617f023d46708414f867c726695a1a2dd562 (diff)
download: llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.zip
llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.gz
llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.bz2
8 files changed, 79 insertions, 16 deletions
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index a2743ed..b5b2f39 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -982,7 +982,8 @@ struct EmboxCharOpConversion : public fir::FIROpConversion<fir::EmboxCharOp> {
 template <typename ModuleOp>
 static mlir::SymbolRefAttr
 getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
-                  mlir::ConversionPatternRewriter &rewriter) {
+                  mlir::ConversionPatternRewriter &rewriter,
+                  mlir::Type indexType) {
   static constexpr char mallocName[] = "malloc";
   if (auto mallocFunc =
           mod.template lookupSymbol<mlir::LLVM::LLVMFuncOp>(mallocName))
@@ -992,7 +993,6 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
     return mlir::SymbolRefAttr::get(userMalloc);
 
   mlir::OpBuilder moduleBuilder(mod.getBodyRegion());
-  auto indexType = mlir::IntegerType::get(op.getContext(), 64);
   auto mallocDecl = moduleBuilder.create<mlir::LLVM::LLVMFuncOp>(
       op.getLoc(), mallocName,
       mlir::LLVM::LLVMFunctionType::get(getLlvmPtrType(op.getContext()),
@@ -1002,12 +1002,13 @@ getMallocInModule(ModuleOp mod, fir::AllocMemOp op,
 }
 
 /// Return the LLVMFuncOp corresponding to the standard malloc call.
-static mlir::SymbolRefAttr
-getMalloc(fir::AllocMemOp op, mlir::ConversionPatternRewriter &rewriter) {
+static mlir::SymbolRefAttr getMalloc(fir::AllocMemOp op,
+                                     mlir::ConversionPatternRewriter &rewriter,
+                                     mlir::Type indexType) {
   if (auto mod = op->getParentOfType<mlir::gpu::GPUModuleOp>())
-    return getMallocInModule(mod, op, rewriter);
+    return getMallocInModule(mod, op, rewriter, indexType);
   auto mod = op->getParentOfType<mlir::ModuleOp>();
-  return getMallocInModule(mod, op, rewriter);
+  return getMallocInModule(mod, op, rewriter, indexType);
 }
 
 /// Helper function for generating the LLVM IR that computes the distance
@@ -1067,7 +1068,12 @@ struct AllocMemOpConversion : public fir::FIROpConversion<fir::AllocMemOp> {
     for (mlir::Value opnd : adaptor.getOperands())
       size = rewriter.create<mlir::LLVM::MulOp>(
           loc, ity, size, integerCast(loc, rewriter, ity, opnd));
-    heap->setAttr("callee", getMalloc(heap, rewriter));
+    auto mallocTyWidth = lowerTy().getIndexTypeBitwidth();
+    auto mallocTy =
+        mlir::IntegerType::get(rewriter.getContext(), mallocTyWidth);
+    if (mallocTyWidth != ity.getIntOrFloatBitWidth())
+      size = integerCast(loc, rewriter, mallocTy, size);
+    heap->setAttr("callee", getMalloc(heap, rewriter, mallocTy));
     rewriter.replaceOpWithNewOp<mlir::LLVM::CallOp>(
         heap, ::getLlvmPtrType(heap.getContext()), size,
         addLLVMOpBundleAttrs(rewriter, heap->getAttrs(), 1));
@@ -2116,7 +2122,7 @@ private:
       unsigned dim = iter.index();
       mlir::Value lb = one;
       if (!lbounds.empty()) {
-        lb = lbounds[dim];
+        lb = integerCast(loc, rewriter, lowerTy().indexType(), lbounds[dim]);
         auto extentIsEmpty = rewriter.create<mlir::LLVM::ICmpOp>(
             loc, mlir::LLVM::ICmpPredicate::eq, extent, zero);
         lb = rewriter.create<mlir::LLVM::SelectOp>(loc, extentIsEmpty, one, lb);
diff --git a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
index 89f4984..1a1d3a8 100644
--- a/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
+++ b/flang/lib/Optimizer/CodeGen/TypeConverter.cpp
@@ -28,10 +28,27 @@
 
 namespace fir {
 
+static mlir::LowerToLLVMOptions MakeLowerOptions(mlir::ModuleOp module) {
+  llvm::StringRef dataLayoutString;
+  auto dataLayoutAttr = module->template getAttrOfType<mlir::StringAttr>(
+      mlir::LLVM::LLVMDialect::getDataLayoutAttrName());
+  if (dataLayoutAttr)
+    dataLayoutString = dataLayoutAttr.getValue();
+
+  auto options = mlir::LowerToLLVMOptions(module.getContext());
+  auto llvmDL = llvm::DataLayout(dataLayoutString);
+  if (llvmDL.getPointerSizeInBits(0) == 32) {
+    // FIXME: Should translateDataLayout in the MLIR layer be doing this?
+    options.overrideIndexBitwidth(32);
+  }
+  options.dataLayout = llvmDL;
+  return options;
+}
+
 LLVMTypeConverter::LLVMTypeConverter(mlir::ModuleOp module, bool applyTBAA,
                                      bool forceUnifiedTBAATree,
                                      const mlir::DataLayout &dl)
-    : mlir::LLVMTypeConverter(module.getContext()),
+    : mlir::LLVMTypeConverter(module.getContext(), MakeLowerOptions(module)),
       kindMapping(getKindMapping(module)),
       specifics(CodeGenSpecifics::get(
           module.getContext(), getTargetTriple(module), getKindMapping(module),
diff --git a/flang/test/Fir/alloc-32.fir b/flang/test/Fir/alloc-32.fir
new file mode 100644
index 0000000..3eefc32
--- /dev/null
+++ b/flang/test/Fir/alloc-32.fir
@@ -0,0 +1,30 @@
+// RUN: %flang_fc1 -triple i686 -emit-llvm  %s -o - | FileCheck %s
+// REQUIRES: x86-registered-target
+
+// This is a check for calling malloc using i32 when on a 32-bit target (only).
+// It doesn't contain the comprehensive tests that alloc.fir has, and
+// that file should be used to exercise most code paths.
+
+module attributes {
+    fir.defaultkind = "a1c4d8i4l4r4", fir.kindmap = "", llvm.target_triple = "i686"
+} {
+
+// CHECK-LABEL: define ptr @allocmem_scalar_nonchar(
+// CHECK: call ptr @malloc(i32 4)
+func.func @allocmem_scalar_nonchar() -> !fir.heap<i32> {
+  %1 = fir.allocmem i32
+  return %1 : !fir.heap<i32>
+}
+
+// CHECK-LABEL: define ptr @allocmem_scalar_dynchar(
+// CHECK-SAME: i32 %[[len:.*]])
+// CHECK: %[[mul1:.*]] = sext i32 %[[len]] to i64
+// CHECK: %[[mul2:.*]] = mul i64 1, %[[mul1]]
+// CHECK: %[[trunc:.*]] = trunc i64 %[[mul2]] to i32
+// CHECK: call ptr @malloc(i32 %[[trunc]])
+func.func @allocmem_scalar_dynchar(%l : i32) -> !fir.heap<!fir.char<1,?>> {
+  %1 = fir.allocmem !fir.char<1,?>(%l : i32)
+  return %1 : !fir.heap<!fir.char<1,?>>
+}
+
+}
diff --git a/flang/test/Fir/alloc.fir b/flang/test/Fir/alloc.fir
index ba9b08d..5b4930b 100644
--- a/flang/test/Fir/alloc.fir
+++ b/flang/test/Fir/alloc.fir
@@ -2,6 +2,10 @@
 // RUN: %flang_fc1 -emit-llvm  %s -o - | FileCheck %s
 
 // UNSUPPORTED: system-windows
+// Disabled on 32-bit targets due to the additional `trunc` opcodes required
+// UNSUPPORTED: target-x86
+// UNSUPPORTED: target=sparc-{{.*}}
+// UNSUPPORTED: target=sparcel-{{.*}}
 
 // CHECK-LABEL: define ptr @alloca_scalar_nonchar()
 // CHECK: alloca i32, i64 1
diff --git a/flang/test/Integration/OpenMP/private-global.f90 b/flang/test/Integration/OpenMP/private-global.f90
index 1aacfb4..8f8de8c 100644
--- a/flang/test/Integration/OpenMP/private-global.f90
+++ b/flang/test/Integration/OpenMP/private-global.f90
@@ -31,14 +31,14 @@ End Program
 ! CHECK:         %[[FIFTY:.*]] = alloca i32, i64 1, align 4
 ! CHECK:         %[[INTERMEDIATE:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, align 8
 ! CHECK:         %[[TABLE_BOX_ADDR2:.*]] = alloca { ptr, i64, i32, i8, i8, i8, i8, [1 x [3 x i64]] }, i64 1, align 8
-! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false)
+! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[INTERMEDIATE]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false)
 ! CHECK:         store i32 50, ptr %[[FIFTY]], align 4
 ! CHECK:         %[[FIFTY_BOX_VAL:.*]] = insertvalue { ptr, i64, i32, i8, i8, i8, i8 } { ptr undef, i64 4, i32 20240719, i8 0, i8 9, i8 0, i8 0 }, ptr %[[FIFTY]], 0
-! CHECK:         store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align 8
-! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 48, i1 false)
+! CHECK:         store { ptr, i64, i32, i8, i8, i8, i8 } %[[FIFTY_BOX_VAL]], ptr %[[BOXED_FIFTY]], align {{[48]}}
+! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR2]], ptr %[[INTERMEDIATE]], i32 {{4[48]}}, i1 false)
 ! CHECK:         call void @_FortranAAssign(ptr %[[TABLE_BOX_ADDR2]], ptr %[[BOXED_FIFTY]], ptr @{{.*}}, i32 9)
-! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 48, i1 false)
-! CHECK:         %[[PRIV_TABLE:.*]] = call ptr @malloc(i64 40)
+! CHECK:         call void @llvm.memcpy.p0.p0.i32(ptr %[[TABLE_BOX_ADDR]], ptr %[[PRIV_BOX_ALLOC]], i32 {{4[48]}}, i1 false)
+! CHECK:         %[[PRIV_TABLE:.*]] = call ptr @malloc(i{{(32)|(64)}} 40)
 ! ...
 ! check that we use the private copy of table for table/=50
 ! CHECK:       omp.par.region3:
diff --git a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90 b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
index be25169..f769fd3 100644
--- a/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
+++ b/flang/test/Lower/OpenMP/parallel-reduction-mixed.f90
@@ -36,7 +36,7 @@ end subroutine proc
 
 !CHECK: [[MALLOC_BB]]:
 !CHECK-NOT: omp.par.{{.*}}:
-!CHECK: call ptr @malloc(i64 80)
+!CHECK: call ptr @malloc(i{{(32)|(64)}} 80)
 
 !CHECK: %[[RED_ARR_0:.*]] = getelementptr inbounds [2 x ptr], ptr %red.array, i64 0, i64 0
 !CHECK: store ptr %[[F_priv]], ptr %[[RED_ARR_0:.*]]
diff --git a/flang/test/Lower/forall/character-1.f90 b/flang/test/Lower/forall/character-1.f90
index d5f968b..69064dd 100644
--- a/flang/test/Lower/forall/character-1.f90
+++ b/flang/test/Lower/forall/character-1.f90
@@ -2,6 +2,10 @@
 ! RUN: %flang -emit-llvm -flang-deprecated-no-hlfir -S -mmlir -disable-external-name-interop %s -o - | FileCheck %s
 ! Test from Fortran source through to LLVM IR.
 ! UNSUPPORTED: system-windows
+! Disabled on 32-bit targets due to the additional `trunc` opcodes required
+! UNSUPPORTED: target-x86
+! UNSUPPORTED: target=sparc-{{.*}}
+! UNSUPPORTED: target=sparcel-{{.*}}
 
 ! Assumed size array of assumed length character.
 program test
diff --git a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
index 014b1c8..7718d8a 100644
--- a/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
+++ b/llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp
@@ -3771,6 +3771,8 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
 
   // Emit a call to the runtime function that orchestrates the reduction.
   // Declare the reduction function in the process.
+  Type *IndexTy = Builder.getIndexTy(
+      M.getDataLayout(), M.getDataLayout().getDefaultGlobalsAddressSpace());
   Function *Func = Builder.GetInsertBlock()->getParent();
   Module *Module = Func->getParent();
   uint32_t SrcLocStrSize;
@@ -3786,7 +3788,7 @@ OpenMPIRBuilder::createReductions(const LocationDescription &Loc,
   Constant *NumVariables = Builder.getInt32(NumReductions);
   const DataLayout &DL = Module->getDataLayout();
   unsigned RedArrayByteSize = DL.getTypeStoreSize(RedArrayTy);
-  Constant *RedArraySize = Builder.getInt64(RedArrayByteSize);
+  Constant *RedArraySize = ConstantInt::get(IndexTy, RedArrayByteSize);
   Function *ReductionFunc = getFreshReductionFunc(*Module);
   Value *Lock = getOMPCriticalRegionLock(".reduction");
   Function *ReduceFunc = getOrCreateRuntimeFunctionPtr(
author	R <rqou@berkeley.edu>	2025-03-11 02:01:57 +0000
committer	GitHub <noreply@github.com>	2025-03-11 02:01:57 +0000
commit	1dffe8f364a4d5ed0b1efc981a1d9a30166aae05 (patch)
tree	e668f5f576caf123d404987209ebd5010813552b
parent	3226617f023d46708414f867c726695a1a2dd562 (diff)
download	llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.zip llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.gz llvm-1dffe8f364a4d5ed0b1efc981a1d9a30166aae05.tar.bz2