diff options
Diffstat (limited to 'flang/lib')
-rw-r--r-- | flang/lib/Lower/IO.cpp | 8 | ||||
-rw-r--r-- | flang/lib/Optimizer/Builder/IntrinsicCall.cpp | 15 | ||||
-rw-r--r-- | flang/lib/Optimizer/CodeGen/CodeGen.cpp | 4 | ||||
-rw-r--r-- | flang/lib/Optimizer/Transforms/CUFOpConversion.cpp | 7 |
4 files changed, 32 insertions, 2 deletions
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp index 98dc78f..604b137 100644 --- a/flang/lib/Lower/IO.cpp +++ b/flang/lib/Lower/IO.cpp @@ -524,12 +524,18 @@ getNamelistGroup(Fortran::lower::AbstractConverter &converter, descAddr = builder.createConvert(loc, builder.getRefType(symType), varAddr); } else { + fir::BaseBoxType boxType; const auto expr = Fortran::evaluate::AsGenericExpr(s); fir::ExtendedValue exv = converter.genExprAddr(*expr, stmtCtx); mlir::Type type = fir::getBase(exv).getType(); + bool isClassType = mlir::isa<fir::ClassType>(type); if (mlir::Type baseTy = fir::dyn_cast_ptrOrBoxEleTy(type)) type = baseTy; - fir::BoxType boxType = fir::BoxType::get(fir::PointerType::get(type)); + + if (isClassType) + boxType = fir::ClassType::get(fir::PointerType::get(type)); + else + boxType = fir::BoxType::get(fir::PointerType::get(type)); descAddr = builder.createTemporary(loc, boxType); fir::MutableBoxValue box = fir::MutableBoxValue(descAddr, {}, {}); fir::factory::associateMutableBox(builder, loc, box, exv, diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp index 5fe2a76..e07baaf 100644 --- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp +++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp @@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{ {"dst", asAddr}, {"nbytes", asValue}}}, /*isElemental=*/false}, + {"tma_bulk_s2g", + &I::genTMABulkS2G, + {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}}, + /*isElemental=*/false}, {"tma_bulk_wait_group", &I::genTMABulkWaitGroup, {{}}, @@ -9227,6 +9231,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) { builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {}); } +// TMA_BULK_S2G (CUDA) +void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) { + assert(args.size() == 3); + mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]), + mlir::NVVM::NVVMMemorySpace::Shared); + mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]), + mlir::NVVM::NVVMMemorySpace::Global); + mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create( + builder, loc, dst, src, fir::getBase(args[2]), {}, {}); +} + // TMA_BULK_WAIT_GROUP (CUDA) void IntrinsicLibrary::genTMABulkWaitGroup( llvm::ArrayRef<fir::ExtendedValue> args) { diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp index 4a05cd9..0afb295 100644 --- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp +++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp @@ -3229,6 +3229,10 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> { g.setAddrSpace( static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared)); + if (global.getDataAttr() && + *global.getDataAttr() == cuf::DataAttribute::Constant) + TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation"); + rewriter.eraseOp(global); return mlir::success(); } diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp index 609a1fc..759e3a65d 100644 --- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp +++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp @@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter, if (srcTy.isInteger(1)) { // i1 is not a supported type in the descriptor and it is actually coming // from a LOGICAL constant. Use the destination type to avoid mismatch. + assert(dstEleTy && "expect dst element type to be set"); srcTy = dstEleTy; src = createConvertOp(rewriter, loc, srcTy, src); addr = builder.createTemporary(loc, srcTy); @@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion // Initialization of an array from a scalar value should be implemented // via a kernel launch. Use the flang runtime via the Assign function // until we have more infrastructure. - mlir::Value src = emboxSrc(rewriter, op, symtab); + mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy)); + mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy); mlir::Value dst = emboxDst(rewriter, op, symtab); mlir::func::FuncOp func = fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>( @@ -739,6 +741,9 @@ struct CUFDataTransferOpConversion fir::StoreOp::create(builder, loc, val, box); return box; } + if (mlir::isa<fir::BaseBoxType>(val.getType())) + if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(val.getDefiningOp())) + return loadOp.getMemref(); return val; }; |