diff options
author | khaki3 <47756807+khaki3@users.noreply.github.com> | 2025-08-26 21:26:58 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2025-08-26 21:26:58 -0700 |
commit | 9a81d853cee219af05ee58959ea9c5e9ac69980e (patch) | |
tree | 7edb470a0ee9d7d41422c97019d1b5c6902ba0c5 /flang/lib/Lower/OpenACC.cpp | |
parent | f44eaf47dc1357959d14cbf5f278c82e554689b2 (diff) | |
download | llvm-9a81d853cee219af05ee58959ea9c5e9ac69980e.zip llvm-9a81d853cee219af05ee58959ea9c5e9ac69980e.tar.gz llvm-9a81d853cee219af05ee58959ea9c5e9ac69980e.tar.bz2 |
[flang][acc] Fix the indexing of the reduction combiner for multidimensional static arrays (#155536)
In the following example of reducing a static 2D array, we have
incorrect coordinates for array access in the reduction combiner. This
PR reverses the order of the induction variables used for such array
indexing. For other cases of static arrays, we reverse the loop order as
well so that the innermost loop can handle the innermost dimension.
```Fortran
program main
implicit none
integer, parameter :: m = 2
integer, parameter :: n = 10
integer :: r(n,m), i
r = 0
!$acc parallel loop reduction(+:r(:n,:m))
do i = 1, n
r(i, 1) = i
enddo
print *, r
end program main
```
Currently, we have:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg2, %arg3 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```
We'll obtain:
```mlir
fir.do_loop %arg2 = %c0 to %c1 step %c1 {
fir.do_loop %arg3 = %c0 to %c9 step %c1 {
%0 = fir.coordinate_of %arg0, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
%1 = fir.coordinate_of %arg1, %arg3, %arg2 : (!fir.ref<!fir.array<10x2xi32>>, index, index) -> !fir.ref<i32>
```
Diffstat (limited to 'flang/lib/Lower/OpenACC.cpp')
-rw-r--r-- | flang/lib/Lower/OpenACC.cpp | 14 |
1 files changed, 8 insertions, 6 deletions
diff --git a/flang/lib/Lower/OpenACC.cpp b/flang/lib/Lower/OpenACC.cpp index 35edcb0..7a84b21 100644 --- a/flang/lib/Lower/OpenACC.cpp +++ b/flang/lib/Lower/OpenACC.cpp @@ -1575,7 +1575,7 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, if (bounds.empty()) { llvm::SmallVector<mlir::Value> extents; mlir::Type idxTy = builder.getIndexType(); - for (auto extent : seqTy.getShape()) { + for (auto extent : llvm::reverse(seqTy.getShape())) { mlir::Value lb = mlir::arith::ConstantOp::create( builder, loc, idxTy, builder.getIntegerAttr(idxTy, 0)); mlir::Value ub = mlir::arith::ConstantOp::create( @@ -1607,12 +1607,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, } } else { // Lowerbound, upperbound and step are passed as block arguments. - [[maybe_unused]] unsigned nbRangeArgs = + unsigned nbRangeArgs = recipe.getCombinerRegion().getArguments().size() - 2; assert((nbRangeArgs / 3 == seqTy.getDimension()) && "Expect 3 block arguments per dimension"); - for (unsigned i = 2; i < recipe.getCombinerRegion().getArguments().size(); - i += 3) { + for (int i = nbRangeArgs - 1; i >= 2; i -= 3) { mlir::Value lb = recipe.getCombinerRegion().getArgument(i); mlir::Value ub = recipe.getCombinerRegion().getArgument(i + 1); mlir::Value step = recipe.getCombinerRegion().getArgument(i + 2); @@ -1623,8 +1622,11 @@ static void genCombiner(fir::FirOpBuilder &builder, mlir::Location loc, ivs.push_back(loop.getInductionVar()); } } - auto addr1 = fir::CoordinateOp::create(builder, loc, refTy, value1, ivs); - auto addr2 = fir::CoordinateOp::create(builder, loc, refTy, value2, ivs); + llvm::SmallVector<mlir::Value> reversedIvs(ivs.rbegin(), ivs.rend()); + auto addr1 = + fir::CoordinateOp::create(builder, loc, refTy, value1, reversedIvs); + auto addr2 = + fir::CoordinateOp::create(builder, loc, refTy, value2, reversedIvs); auto load1 = fir::LoadOp::create(builder, loc, addr1); auto load2 = fir::LoadOp::create(builder, loc, addr2); mlir::Value res = |