diff options
Diffstat (limited to 'flang/test/Lower')
| -rw-r--r-- | flang/test/Lower/CUDA/cuda-device-proc.cuf | 264 | ||||
| -rw-r--r-- | flang/test/Lower/OpenMP/atomic-read-complex.f90 | 34 | ||||
| -rw-r--r-- | flang/test/Lower/OpenMP/atomic-write-complex.f90 | 34 | ||||
| -rw-r--r-- | flang/test/Lower/forall-pointer-assignment.f90 (renamed from flang/test/Lower/forall-polymorphic.f90) | 87 | ||||
| -rw-r--r-- | flang/test/Lower/inline_directive.f90 | 61 |
5 files changed, 477 insertions, 3 deletions
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf index 5c4c3c6..09b4302 100644 --- a/flang/test/Lower/CUDA/cuda-device-proc.cuf +++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf @@ -431,7 +431,7 @@ end subroutine ! CHECK: %[[COUNT:.*]] = arith.constant 256 : i32 ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr ! CHECK: %[[SHARED_PTR:.*]] = llvm.addrspacecast %[[LLVM_PTR]] : !llvm.ptr to !llvm.ptr<3> -! CHECK: nvvm.mbarrier.init.shared %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32 +! CHECK: nvvm.mbarrier.init %[[SHARED_PTR]], %[[COUNT]] : !llvm.ptr<3>, i32 ! CHECK: nvvm.fence.proxy {kind = #nvvm.proxy_kind<async.shared>, space = #nvvm.shared_space<cta>} ! CHECK: %[[LLVM_PTR:.*]] = fir.convert %[[DECL_SHARED]]#0 : (!fir.ref<i64>) -> !llvm.ptr @@ -468,7 +468,18 @@ attributes(global) subroutine test_bulk_g2s(a) end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_g2s -! CHECK: nvvm.cp.async.bulk.shared.cluster.global %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : <7>, <1> +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %4 {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_bulk_g2sEbarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[DST:.*]]:2 = hlfir.declare %16(%17) {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_bulk_g2sEtmpa"} : (!fir.ref<!fir.array<1024xf64>>, !fir.shape<1>) -> (!fir.ref<!fir.array<1024xf64>>, !fir.ref<!fir.array<1024xf64>>) +! CHECK: %[[COUNT:.*]]:2 = hlfir.declare %19 {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_bulk_g2sEtx_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[SRC:.*]] = hlfir.designate %{{.*}} (%{{.*}}) : (!fir.box<!fir.array<?xf64>>, i64) -> !fir.ref<f64> +! CHECK: %[[COUNT_LOAD:.*]] = fir.load %20#0 : !fir.ref<i32> +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: %[[BARRIER_3:.*]] = llvm.addrspacecast %[[BARRIER_PTR]] : !llvm.ptr to !llvm.ptr<3> +! CHECK: %[[DST_PTR:.*]] = fir.convert %[[DST]]#0 : (!fir.ref<!fir.array<1024xf64>>) -> !llvm.ptr +! CHECK: %[[DST_7:.*]] = llvm.addrspacecast %[[DST_PTR]] : !llvm.ptr to !llvm.ptr<7> +! CHECK: %[[SRC_PTR:.*]] = fir.convert %[[SRC]] : (!fir.ref<f64>) -> !llvm.ptr +! CHECK: %[[SRC_3:.*]] = llvm.addrspacecast %[[SRC_PTR]] : !llvm.ptr to !llvm.ptr<1> +! CHECK: nvvm.cp.async.bulk.shared.cluster.global %[[DST_7]], %[[SRC_3]], %[[BARRIER_3]], %[[COUNT_LOAD]] : <7>, <1> attributes(global) subroutine test_bulk_s2g(a) real(8), device :: a(*) @@ -479,6 +490,8 @@ end subroutine ! CHECK-LABEL: func.func @_QPtest_bulk_s2g ! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 attributes(device) subroutine testAtomicCasLoop(aa, n) integer :: a @@ -492,3 +505,250 @@ end subroutine ! CHECK: %[[CASTED_CMP_XCHG_EV:.*]] = fir.convert %[[CMP_XCHG_EV]] : (i1) -> i32 ! CHECK: %{{.*}} = arith.constant 1 : i32 ! CHECK: %19 = arith.cmpi eq, %[[CASTED_CMP_XCHG_EV]], %{{.*}} : i32 + +attributes(global) subroutine test_barrier_try_wait() + integer :: istat + integer(8), shared :: barrier1 + integer(8) :: token + istat = barrier_try_wait(barrier1, token) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_barrier_try_wait() +! CHECK: scf.while +! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %{{.*}}, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %c1000000{{.*}} : !llvm.ptr, i64, i32) -> i32 + +attributes(global) subroutine test_barrier_try_wait_sleep() + integer :: istat + integer(8), shared :: barrier1 + integer(8) :: token + integer(4) :: sleep_time + istat = barrier_try_wait_sleep(barrier1, token, sleep_time) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_barrier_try_wait_sleep() +! CHECK: %{{.*}} = nvvm.inline_ptx ".reg .pred p; mbarrier.try_wait.shared.b64 p, [%{{.*}}], %{{.*}}, %{{.*}}; selp.b32 %0, 1, 0, p;" ro(%{{.*}}, %{{.*}}, %{{.*}} : !llvm.ptr, i64, i32) -> i32 + +attributes(global) subroutine test_tma_bulk_load_c4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + complex(4), device :: r8(n) + complex(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_c8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + complex(8), device :: r8(n) + complex(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_c8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_c8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_c8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 16 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_i4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + integer(4), device :: r8(n) + integer(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_i8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + integer(8), device :: r8(n) + integer(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_i8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_i8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_i8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r2(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(2), device :: r8(n) + real(2), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r2 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r2Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r2Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 2 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r4(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(4), device :: r8(n) + real(4), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r4 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r4Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r4Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 4 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_load_r8(a, n) + integer(8), shared :: barrier1 + integer, value :: n + real(8), device :: r8(n) + real(8), shared :: tmp(1024) + integer(4) :: j, elem_count + call tma_bulk_load(barrier1, r8(j), tmp, elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_load_r8 +! CHECK: %[[BARRIER:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<shared>, uniq_name = "_QFtest_tma_bulk_load_r8Ebarrier1"} : (!fir.ref<i64>) -> (!fir.ref<i64>, !fir.ref<i64>) +! CHECK: %[[ELEM_COUNT:.*]]:2 = hlfir.declare %{{.*}} {data_attr = #cuf.cuda<device>, uniq_name = "_QFtest_tma_bulk_load_r8Eelem_count"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>) +! CHECK: %[[COUNT:.*]] = fir.load %[[ELEM_COUNT]]#0 : !fir.ref<i32> +! CHECK: %[[ELEM_SIZE:.*]] = arith.constant 8 : i32 +! CHECK: %[[SIZE:.*]] = arith.muli %[[COUNT]], %[[ELEM_SIZE]] : i32 +! CHECK: %[[BARRIER_PTR:.*]] = fir.convert %[[BARRIER]]#0 : (!fir.ref<i64>) -> !llvm.ptr +! CHECK: nvvm.inline_ptx "cp.async.bulk.shared::cluster.global.mbarrier::complete_tx::bytes [%0], [%1], %2, [%3];" ro(%{{.*}}, %{{.*}}, %[[SIZE]], %[[BARRIER_PTR]] : !llvm.ptr, !llvm.ptr, i32, !llvm.ptr) +! CHECK: nvvm.inline_ptx "mbarrier.expect_tx.relaxed.cta.shared::cta.b64 [%0], %1;" ro(%[[BARRIER_PTR]], %[[SIZE]] : !llvm.ptr, i32) + +attributes(global) subroutine test_tma_bulk_store_c4(c, n) + integer, value :: n + complex(4), device :: c(n) + complex(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_c8(c, n) + integer, value :: n + complex(8), device :: c(n) + complex(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_c8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_i4(c, n) + integer, value :: n + integer(4), device :: c(n) + integer(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_i8(c, n) + integer, value :: n + integer(8), device :: c(n) + integer(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_i8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + + +attributes(global) subroutine test_tma_bulk_store_r2(c, n) + integer, value :: n + real(2), device :: c(n) + real(2), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r2 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_r4(c, n) + integer, value :: n + real(4), device :: c(n) + real(4), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r4 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 + +attributes(global) subroutine test_tma_bulk_store_r8(c, n) + integer, value :: n + real(8), device :: c(n) + real(8), shared :: tmpa(1024) + integer(4) :: j, elem_count + call tma_bulk_store(tmpa, c(j), elem_count) +end subroutine + +! CHECK-LABEL: func.func @_QPtest_tma_bulk_store_r8 +! CHECK: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3> +! CHECK: nvvm.inline_ptx "cp.async.bulk.commit_group" +! CHECK: nvvm.cp.async.bulk.wait_group 0 diff --git a/flang/test/Lower/OpenMP/atomic-read-complex.f90 b/flang/test/Lower/OpenMP/atomic-read-complex.f90 new file mode 100644 index 0000000..2f51f03 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-read-complex.f90 @@ -0,0 +1,34 @@ +! Test lowering of atomic read to LLVM IR for complex types. +! This is a regression test for issue #165184. + +! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s | FileCheck %s + +! Test that atomic read operations with complex types emit the correct +! size parameter to __atomic_load: +! - complex(4) (8 bytes total): should call __atomic_load(i64 8, ...) +! - complex(8) (16 bytes total): should call __atomic_load(i64 16, ...) + +program atomic_read_complex + implicit none + + ! Test complex(4) - single precision (8 bytes) + complex(4) :: c41, c42 + ! Test complex(8) - double precision (16 bytes) + complex(8) :: c81, c82 + + c42 = (1.0_4, 1.0_4) + c82 = (1.0_8, 1.0_8) + + ! CHECK-LABEL: define {{.*}} @_QQmain + + ! Single precision complex: 8 bytes + ! CHECK: call void @__atomic_load(i64 8, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) +!$omp atomic read + c41 = c42 + + ! Double precision complex: 16 bytes (this was broken before the fix) + ! CHECK: call void @__atomic_load(i64 16, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) +!$omp atomic read + c81 = c82 + +end program atomic_read_complex diff --git a/flang/test/Lower/OpenMP/atomic-write-complex.f90 b/flang/test/Lower/OpenMP/atomic-write-complex.f90 new file mode 100644 index 0000000..48cfe26 --- /dev/null +++ b/flang/test/Lower/OpenMP/atomic-write-complex.f90 @@ -0,0 +1,34 @@ +! Test lowering of atomic write to LLVM IR for complex types. +! This is a regression test for issue #165184. + +! RUN: %flang_fc1 -emit-llvm -fopenmp -o - %s | FileCheck %s + +! Test that atomic write operations with complex types emit the correct +! size parameter to __atomic_store: +! - complex(4) (8 bytes total): should call __atomic_store(i64 8, ...) +! - complex(8) (16 bytes total): should call __atomic_store(i64 16, ...) + +program atomic_write_complex + implicit none + + ! Test complex(4) - single precision (8 bytes) + complex(4) :: c41, c42 + ! Test complex(8) - double precision (16 bytes) + complex(8) :: c81, c82 + + c42 = (1.0_4, 1.0_4) + c82 = (1.0_8, 1.0_8) + + ! CHECK-LABEL: define {{.*}} @_QQmain + + ! Single precision complex: 8 bytes + ! CHECK: call void @__atomic_store(i64 8, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) +!$omp atomic write + c41 = c42 + + ! Double precision complex: 16 bytes (this was broken before the fix) + ! CHECK: call void @__atomic_store(i64 16, ptr {{.*}}, ptr {{.*}}, i32 {{.*}}) +!$omp atomic write + c81 = c82 + +end program atomic_write_complex diff --git a/flang/test/Lower/forall-polymorphic.f90 b/flang/test/Lower/forall-pointer-assignment.f90 index 2b7a51f..ec142e3 100644 --- a/flang/test/Lower/forall-polymorphic.f90 +++ b/flang/test/Lower/forall-pointer-assignment.f90 @@ -1,6 +1,7 @@ -! Test lower of FORALL polymorphic pointer assignment +! Test lower of FORALL pointer assignment ! RUN: bbc -emit-fir %s -o - | FileCheck %s + !! Test when LHS is polymorphic and RHS is not polymorphic ! CHECK-LABEL: c.func @_QPforallpolymorphic subroutine forallPolymorphic() @@ -46,6 +47,7 @@ end subroutine forallPolymorphic + !! Test when LHS is not polymorphic but RHS is polymorphic ! CHECK-LABEL: c.func @_QPforallpolymorphic2( ! CHECK-SAME: %arg0: !fir.ref<!fir.class<!fir.heap<!fir.array<?x!fir.type<_QFforallpolymorphic2Tdt{ptr:!fir.box<!fir.ptr<!fir.array<?x!fir.type<_QFforallpolymorphic2Tdt>>>>}>>>>> {fir.bindc_name = "tar1", fir.target}) { @@ -87,3 +89,86 @@ end subroutine forallPolymorphic2 + +!! Test when LHS is unlimited polymorphic and RHS non-polymorphic intrinsic +!! type target. +! CHECK-LABEL: c.func @_QPforallpolymorphic3 +subroutine forallPolymorphic3() + TYPE :: DT + CLASS(*), POINTER :: Ptr => NULL() + END TYPE + + TYPE(DT) :: D1(10) + CHARACTER*1, TARGET :: TAR1(10) + INTEGER :: I + + FORALL (I=1:10) + D1(I)%Ptr => Tar1(I) + END FORALL + +! CHECK: %[[V_7:[0-9]+]] = fir.alloca !fir.array<10x!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>> {bindc_name = "d1", uniq_name = "_QFforallpolymorphic3Ed1"} +! CHECK: %[[V_8:[0-9]+]] = fir.shape %c10 : (index) -> !fir.shape<1> +! CHECK: %[[V_9:[0-9]+]] = fir.declare %[[V_7]](%[[V_8]]) {uniq_name = "_QFforallpolymorphic3Ed1"} : (!fir.ref<!fir.array<10x!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>>>, !fir.shape<1>) -> !fir.ref<!fir.array<10x!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>>> +! CHECK: %[[V_16:[0-9]+]] = fir.alloca !fir.array<10x!fir.char<1>> {bindc_name = "tar1", fir.target, uniq_name = "_QFforallpolymorphic3Etar1"} +! CHECK: %[[V_17:[0-9]+]] = fir.declare %[[V_16]](%[[V_8]]) typeparams %c1 {fortran_attrs = #fir.var_attrs<target>, uniq_name = "_QFforallpolymorphic3Etar1"} : (!fir.ref<!fir.array<10x!fir.char<1>>>, !fir.shape<1>, index) -> !fir.ref<!fir.array<10x!fir.char<1>>> +! CHECK: %[[V_24:[0-9]+]] = fir.convert %c1_i32 : (i32) -> index +! CHECK: %[[V_25:[0-9]+]] = fir.convert %c10_i32 : (i32) -> index +! CHECK: fir.do_loop %arg0 = %[[V_24]] to %[[V_25]] step %c1 +! CHECK: { +! CHECK: %[[V_26:[0-9]+]] = fir.convert %arg0 : (index) -> i32 +! CHECK: %[[V_27:[0-9]+]] = fir.convert %[[V_26]] : (i32) -> i64 +! CHECK: %[[V_28:[0-9]+]] = fir.array_coor %[[V_9]](%[[V_8]]) %[[V_27]] : (!fir.ref<!fir.array<10x!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>>>, !fir.shape<1>, i64) -> !fir.ref<!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>> +! CHECK: %[[V_29:[0-9]+]] = fir.field_index ptr, !fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}> +! CHECK: %[[V_30:[0-9]+]] = fir.coordinate_of %[[V_28]], ptr : (!fir.ref<!fir.type<_QFforallpolymorphic3Tdt{ptr:!fir.class<!fir.ptr<none>>}>>) -> !fir.ref<!fir.class<!fir.ptr<none>>> +! CHECK: %[[V_31:[0-9]+]] = fir.convert %[[V_26]] : (i32) -> i64 +! CHECK: %[[V_32:[0-9]+]] = fir.array_coor %[[V_17]](%[[V_8]]) %31 : (!fir.ref<!fir.array<10x!fir.char<1>>>, !fir.shape<1>, i64) -> !fir.ref<!fir.char<1>> +! CHECK: %[[V_33:[0-9]+]] = fir.embox %[[V_32]] : (!fir.ref<!fir.char<1>>) -> !fir.box<!fir.ptr<!fir.char<1>>> +! CHECK: %[[V_34:[0-9]+]] = fir.rebox %[[V_33]] : (!fir.box<!fir.ptr<!fir.char<1>>>) -> !fir.class<!fir.ptr<none>> +! CHECK: fir.store %[[V_34]] to %[[V_30]] : !fir.ref<!fir.class<!fir.ptr<none>>> +! CHECK: } + +end subroutine forallPolymorphic3 + + +!! Test the LHS of a pointer assignment gets the isPointer flag from the +!! RHS that is a reference to a function that returns a pointer. +! CHECK-LABEL: c.func @_QPforallpointerassignment1 + subroutine forallPointerAssignment1() + type base + real, pointer :: data => null() + end type + + interface + pure function makeData (i) + real, pointer :: makeData + integer*4, intent(in) :: i + end function + end interface + + type(base) :: co1(10) + + forall (i=1:10) + co1(i)%data => makeData (i) + end forall + +! CHECK: %[[V_3:[0-9]+]] = fir.alloca i64 +! CHECK: %[[V_3:[0-9]+]] = fir.alloca i32 {bindc_name = "i"} +! CHECK: %[[V_4:[0-9]+]] = fir.alloca !fir.box<!fir.ptr<f32>> {bindc_name = ".result"} +! CHECK: %[[V_25:[0-9]+]] = fir.convert %c1_i32 : (i32) -> index +! CHECK: %[[V_26:[0-9]+]] = fir.convert %c10_i32 : (i32) -> index +! CHECK: %[[V_27:[0-9]+]] = fir.address_of(@{{_QQcl.*}}) : !fir.ref<!fir.char<1,{{.*}}>> +! CHECK: %[[V_28:[0-9]+]] = fir.convert %[[V_27]] : (!fir.ref<!fir.char<1,{{.*}}>>) -> !fir.ref<i8> +! CHECK: %[[V_29:[0-9]+]] = fir.call @_FortranACreateDescriptorStack(%[[V_28]], %c{{.*}}) : (!fir.ref<i8>, i32) -> !fir.llvm_ptr<i8> +! CHECK: fir.do_loop %arg0 = %[[V_25]] to %[[V_26]] step %c1 +! CHECK: { +! CHECK: %[[V_32:[0-9]+]] = fir.convert %arg0 : (index) -> i32 +! CHECK: fir.store %[[V_32]] to %[[V_3]] : !fir.ref<i32> +! CHECK: %[[V_33:[0-9]+]] = fir.call @_QPmakedata(%[[V_3]]) proc_attrs<pure> fastmath<contract> : (!fir.ref<i32>) -> !fir.box<!fir.ptr<f32>> +! CHECK: fir.save_result %[[V_33]] to %[[V_4]] : !fir.box<!fir.ptr<f32>>, !fir.ref<!fir.box<!fir.ptr<f32>>> +! CHECK: %[[V_34:[0-9]+]] = fir.declare %[[V_4]] {uniq_name = ".tmp.func_result"} : (!fir.ref<!fir.box<!fir.ptr<f32>>>) -> !fir.ref<!fir.box<!fir.ptr<f32>>> +! CHECK: %[[V_35:[0-9]+]] = fir.load %[[V_34]] : !fir.ref<!fir.box<!fir.ptr<f32>>> +! CHECK: %[[V_36:[0-9]+]] = fir.convert %[[V_35]] : (!fir.box<!fir.ptr<f32>>) -> !fir.box<none> +! CHECK: fir.call @_FortranAPushDescriptor(%[[V_29]], %[[V_36]]) : (!fir.llvm_ptr<i8>, !fir.box<none>) -> () +! CHECK: } + + end subroutine forallPointerAssignment1 diff --git a/flang/test/Lower/inline_directive.f90 b/flang/test/Lower/inline_directive.f90 new file mode 100644 index 0000000..347df85 --- /dev/null +++ b/flang/test/Lower/inline_directive.f90 @@ -0,0 +1,61 @@ +! RUN: %flang_fc1 -emit-fir -o - %s | FileCheck %s + +subroutine test_inline() + integer :: x, y +!CHECK: %[[VAL_0:.*]] = fir.alloca i32 {bindc_name = "x", uniq_name = "_QFtest_inlineEx"} +!CHECK: %[[VAL_1:.*]] = fir.declare %[[VAL_0]] {uniq_name = "_QFtest_inlineEx"} : (!fir.ref<i32>) -> !fir.ref<i32> +!CHECK: %[[VAL_2:.*]] = fir.alloca i32 {bindc_name = "y", uniq_name = "_QFtest_inlineEy"} +!CHECK: %[[VAL_3:.*]] = fir.declare %[[VAL_2]] {uniq_name = "_QFtest_inlineEy"} : (!fir.ref<i32>) -> !fir.ref<i32> + + !dir$ forceinline + y = g(x) + !CHECK: %[[VAL_4:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: fir.store %[[VAL_4]] to %[[VAL_3]] : !fir.ref<i32> + + !dir$ forceinline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ noinline + y = g(x) + 7 * (8 + g(y)) + !CHECK: %[[VAL_8:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_1]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: %[[VAL_9:.*]] = fir.call @_QFtest_inlinePg(%[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>) -> i32 + !CHECK: %[[VAL_10:.*]] = arith.addi %[[VAL_9]], %[[C8:.*]] : i32 + !CHECK: %[[VAL_11:.*]] = fir.no_reassoc %[[VAL_10]] : i32 + !CHECK: %[[VAL_12:.*]] = arith.muli %[[VAL_11]], %[[C7:.*]] : i32 + !CHECK: %[[VAL_13:.*]] = arith.addi %[[VAL_8]], %[[VAL_12]] : i32 + !CHECK: fir.store %[[VAL_13]] to %[[VAL_3]] : !fir.ref<i32> + + !dir$ noinline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<no_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ inline + call f(x, y) + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + + !dir$ forceinline + do i = 1, 100 + !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) { + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<always_inline>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + call f(x, y) + enddo + + !dir$ inline + do i = 1, 100 + !CHECK: fir.do_loop %[[ARG_0:.*]] = %[[FROM:.*]] to %[[TO:.*]] step %[[C1:.*]] iter_args(%[[ARG_1:.*]] = {{.*}}) -> (i32) { + !CHECK: fir.call @_QFtest_inlinePf(%[[VAL_1]], %[[VAL_3]]) fastmath<contract> {inline_attr = #fir.inline_attrs<inline_hint>} : (!fir.ref<i32>, !fir.ref<i32>) -> () + call f(x, y) + enddo +!CHECK: return + contains + subroutine f(x, y) + integer, intent(in) :: x + integer, intent(out) :: y + y = x*2 + end subroutine f + integer function g(x) + integer :: x + g = x*2 + end function g +end subroutine test_inline |
