10 files changed, 173 insertions, 18 deletions
diff --git a/flang/docs/GettingInvolved.md b/flang/docs/GettingInvolved.md
index e2220f3..79af788 100644
--- a/flang/docs/GettingInvolved.md
+++ b/flang/docs/GettingInvolved.md
@@ -46,26 +46,17 @@ Contributions to Flang are done using GitHub Pull Requests and follow the
 
 ## Calls
 
-### Flang Community Biweekly Call
+### Flang Biweekly Call
 
--   General updates on the Flang project.
--   Join [Flang Community Biweekly Call](https://lanl-us.webex.com/lanl-us/j.php?MTID=mdce13c9bd55202e8071d8128fb953614)
-    - If you prefer to join using a meeting number and password, those can be 
-      found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available.
--   Time: Wednesdays, 8:30 a.m. Pacific Time, on the weeks alternating with regular Flang Community Technical Biweekly Call.
+-   Technical discussions as well as general updates on the Flang project.
+-   Join the [Flang Biweekly Call](https://lanl-us.webex.com/lanl-us/j.php?MTID=mdce13c9bd55202e8071d8128fb953614)
+    - If you prefer to join using a meeting number and password, those can be
+      found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available there.
+-   Time: Every other Wednesday, 8:30 a.m. Pacific Time
 -   Calendar invite: https://drive.google.com/file/d/1rkfWCtIvQFcxN0Uz8YVwQGoX_BbzT8oc/view?usp=drive_link
 -   Meeting minutes are available in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/edit).
 -   Minutes from older meetings were posted on the [Flang forum](https://discourse.llvm.org/c/subprojects/flang). Search for `Flang Biweekly Sync - Notes`.
 
-### Flang Community Technical Biweekly Call
-
--   Technical topics call.
--   Join [Flang Community Technical Biweekly Call](https://teams.microsoft.com/l/meetup-join/19%3ameeting_YWU1NzU4ZjQtOTljOS00NWU1LTg5NjktYTUzOTU3MGEwMzAx%40thread.v2/0?context=%7b%22Tid%22%3a%22f34e5979-57d9-4aaa-ad4d-b122a662184d%22%2c%22Oid%22%3a%223641875c-ef5b-4767-8105-0787a195852f%22%7d)
-    - If you prefer to join using a meeting ID and passcode, those can be 
-      found in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/). Alternative methods of joining, such as call-in numbers, are also available.
--   Time: Mondays, 8:30 a.m. Pacific Time, on the weeks alternating with regular Flang Community Biweekly Call.
--   The agenda is in this [Google Doc](https://docs.google.com/document/d/1Z2U5UAtJ-Dag5wlMaLaW1KRmNgENNAYynJqLW2j2AZQ/).
-
 ### LLVM Alias Analysis Technical Call
 
 -   For people working on improvements to LLVM alias analysis.
diff --git a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
index 2adfd6f2..c3cd119b 100644
--- a/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
+++ b/flang/include/flang/Optimizer/Builder/IntrinsicCall.h
@@ -459,6 +459,7 @@ struct IntrinsicLibrary {
   mlir::Value genTime(mlir::Type, llvm::ArrayRef<mlir::Value>);
   void genTMABulkCommitGroup(llvm::ArrayRef<fir::ExtendedValue>);
   void genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue>);
+  void genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue>);
   void genTMABulkWaitGroup(llvm::ArrayRef<fir::ExtendedValue>);
   mlir::Value genTrailz(mlir::Type, llvm::ArrayRef<mlir::Value>);
   fir::ExtendedValue genTransfer(mlir::Type,
diff --git a/flang/lib/Lower/IO.cpp b/flang/lib/Lower/IO.cpp
index 98dc78f..604b137 100644
--- a/flang/lib/Lower/IO.cpp
+++ b/flang/lib/Lower/IO.cpp
@@ -524,12 +524,18 @@ getNamelistGroup(Fortran::lower::AbstractConverter &converter,
         descAddr =
             builder.createConvert(loc, builder.getRefType(symType), varAddr);
       } else {
+        fir::BaseBoxType boxType;
         const auto expr = Fortran::evaluate::AsGenericExpr(s);
         fir::ExtendedValue exv = converter.genExprAddr(*expr, stmtCtx);
         mlir::Type type = fir::getBase(exv).getType();
+        bool isClassType = mlir::isa<fir::ClassType>(type);
         if (mlir::Type baseTy = fir::dyn_cast_ptrOrBoxEleTy(type))
           type = baseTy;
-        fir::BoxType boxType = fir::BoxType::get(fir::PointerType::get(type));
+
+        if (isClassType)
+          boxType = fir::ClassType::get(fir::PointerType::get(type));
+        else
+          boxType = fir::BoxType::get(fir::PointerType::get(type));
         descAddr = builder.createTemporary(loc, boxType);
         fir::MutableBoxValue box = fir::MutableBoxValue(descAddr, {}, {});
         fir::factory::associateMutableBox(builder, loc, box, exv,
diff --git a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
index 5fe2a76..e07baaf 100644
--- a/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
+++ b/flang/lib/Optimizer/Builder/IntrinsicCall.cpp
@@ -1027,6 +1027,10 @@ static constexpr IntrinsicHandler handlers[]{
        {"dst", asAddr},
        {"nbytes", asValue}}},
      /*isElemental=*/false},
+    {"tma_bulk_s2g",
+     &I::genTMABulkS2G,
+     {{{"src", asAddr}, {"dst", asAddr}, {"nbytes", asValue}}},
+     /*isElemental=*/false},
     {"tma_bulk_wait_group",
      &I::genTMABulkWaitGroup,
      {{}},
@@ -9227,6 +9231,17 @@ void IntrinsicLibrary::genTMABulkG2S(llvm::ArrayRef<fir::ExtendedValue> args) {
       builder, loc, dst, src, barrier, fir::getBase(args[3]), {}, {});
 }
 
+// TMA_BULK_S2G (CUDA)
+void IntrinsicLibrary::genTMABulkS2G(llvm::ArrayRef<fir::ExtendedValue> args) {
+  assert(args.size() == 3);
+  mlir::Value src = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[0]),
+                                          mlir::NVVM::NVVMMemorySpace::Shared);
+  mlir::Value dst = convertPtrToNVVMSpace(builder, loc, fir::getBase(args[1]),
+                                          mlir::NVVM::NVVMMemorySpace::Global);
+  mlir::NVVM::CpAsyncBulkSharedCTAToGlobalOp::create(
+      builder, loc, dst, src, fir::getBase(args[2]), {}, {});
+}
+
 // TMA_BULK_WAIT_GROUP (CUDA)
 void IntrinsicLibrary::genTMABulkWaitGroup(
     llvm::ArrayRef<fir::ExtendedValue> args) {
diff --git a/flang/lib/Optimizer/CodeGen/CodeGen.cpp b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
index 4a05cd9..0afb295 100644
--- a/flang/lib/Optimizer/CodeGen/CodeGen.cpp
+++ b/flang/lib/Optimizer/CodeGen/CodeGen.cpp
@@ -3229,6 +3229,10 @@ struct GlobalOpConversion : public fir::FIROpConversion<fir::GlobalOp> {
       g.setAddrSpace(
           static_cast<unsigned>(mlir::NVVM::NVVMMemorySpace::Shared));
 
+    if (global.getDataAttr() &&
+        *global.getDataAttr() == cuf::DataAttribute::Constant)
+      TODO(global.getLoc(), "CUDA Fortran CONSTANT variable code generation");
+
     rewriter.eraseOp(global);
     return mlir::success();
   }
diff --git a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
index 609a1fc..759e3a65d 100644
--- a/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
+++ b/flang/lib/Optimizer/Transforms/CUFOpConversion.cpp
@@ -558,6 +558,7 @@ static mlir::Value emboxSrc(mlir::PatternRewriter &rewriter,
     if (srcTy.isInteger(1)) {
       // i1 is not a supported type in the descriptor and it is actually coming
       // from a LOGICAL constant. Use the destination type to avoid mismatch.
+      assert(dstEleTy && "expect dst element type to be set");
       srcTy = dstEleTy;
       src = createConvertOp(rewriter, loc, srcTy, src);
       addr = builder.createTemporary(loc, srcTy);
@@ -652,7 +653,8 @@ struct CUFDataTransferOpConversion
         // Initialization of an array from a scalar value should be implemented
         // via a kernel launch. Use the flang runtime via the Assign function
         // until we have more infrastructure.
-        mlir::Value src = emboxSrc(rewriter, op, symtab);
+        mlir::Type dstEleTy = fir::unwrapInnerType(fir::unwrapRefType(dstTy));
+        mlir::Value src = emboxSrc(rewriter, op, symtab, dstEleTy);
         mlir::Value dst = emboxDst(rewriter, op, symtab);
         mlir::func::FuncOp func =
             fir::runtime::getRuntimeFunc<mkRTKey(CUFDataTransferCstDesc)>(
@@ -739,6 +741,9 @@ struct CUFDataTransferOpConversion
         fir::StoreOp::create(builder, loc, val, box);
         return box;
       }
+      if (mlir::isa<fir::BaseBoxType>(val.getType()))
+        if (auto loadOp = mlir::dyn_cast<fir::LoadOp>(val.getDefiningOp()))
+          return loadOp.getMemref();
       return val;
     };
 
diff --git a/flang/module/cudadevice.f90 b/flang/module/cudadevice.f90
index a8b9aa8..22df9cd 100644
--- a/flang/module/cudadevice.f90
+++ b/flang/module/cudadevice.f90
@@ -2034,6 +2034,15 @@ implicit none
     end subroutine
   end interface
 
+  interface
+    attributes(device) subroutine tma_bulk_s2g(src, dst, nbytes)
+      !dir$ ignore_tkr src, dst
+      integer(4), shared  :: src(*)
+      integer(4), device  :: dst(*)
+      integer(4), value   :: nbytes
+    end subroutine
+  end interface
+
 contains
 
   attributes(device) subroutine syncthreads()
diff --git a/flang/test/Fir/CUDA/cuda-data-transfer.fir b/flang/test/Fir/CUDA/cuda-data-transfer.fir
index 669300c..b247fce 100644
--- a/flang/test/Fir/CUDA/cuda-data-transfer.fir
+++ b/flang/test/Fir/CUDA/cuda-data-transfer.fir
@@ -651,5 +651,79 @@ func.func @_QPsub28() {
 // CHECK: %[[BOX_NONE:.*]] = fir.convert %[[DESC]] : (!fir.ref<!fir.box<!fir.logical<8>>>) -> !fir.ref<!fir.box<none>>
 // CHECK: fir.call @_FortranACUFDataTransferCstDesc(%{{.*}}, %[[BOX_NONE]], %{{.*}}, %{{.*}}, %{{.*}}) : (!fir.ref<!fir.box<none>>, !fir.ref<!fir.box<none>>, i32, !fir.ref<i8>, i32) -> ()
 
+func.func @_QPtesti4(%arg0: !fir.ref<i32> {fir.bindc_name = "n1"}, %arg1: !fir.ref<i32> {fir.bindc_name = "n2"}, %arg2: !fir.ref<i32> {fir.bindc_name = "n3"}, %arg3: !fir.ref<i32> {fir.bindc_name = "n4"}) {
+  %true = arith.constant true
+  %c0 = arith.constant 0 : index
+  %c2_i32 = arith.constant 2 : i32
+  %0 = fir.dummy_scope : !fir.dscope
+  %1:2 = hlfir.declare %arg0 dummy_scope %0 {uniq_name = "_QFtesti4En1"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %2:2 = hlfir.declare %arg1 dummy_scope %0 {uniq_name = "_QFtesti4En2"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %3:2 = hlfir.declare %arg2 dummy_scope %0 {uniq_name = "_QFtesti4En3"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %4:2 = hlfir.declare %arg3 dummy_scope %0 {uniq_name = "_QFtesti4En4"} : (!fir.ref<i32>, !fir.dscope) -> (!fir.ref<i32>, !fir.ref<i32>)
+  %5 = fir.load %1#0 : !fir.ref<i32>
+  %6 = arith.divsi %5, %c2_i32 : i32
+  %7 = fir.convert %6 : (i32) -> index
+  %8 = arith.cmpi sgt, %7, %c0 : index
+  %9 = arith.select %8, %7, %c0 : index
+  %10 = fir.load %2#0 : !fir.ref<i32>
+  %11 = arith.divsi %10, %c2_i32 : i32
+  %12 = fir.convert %11 : (i32) -> index
+  %13 = arith.cmpi sgt, %12, %c0 : index
+  %14 = arith.select %13, %12, %c0 : index
+  %15 = fir.load %3#0 : !fir.ref<i32>
+  %16 = arith.divsi %15, %c2_i32 : i32
+  %17 = fir.convert %16 : (i32) -> index
+  %18 = arith.cmpi sgt, %17, %c0 : index
+  %19 = arith.select %18, %17, %c0 : index
+  %20 = fir.load %4#0 : !fir.ref<i32>
+  %21 = arith.divsi %20, %c2_i32 : i32
+  %22 = fir.convert %21 : (i32) -> index
+  %23 = arith.cmpi sgt, %22, %c0 : index
+  %24 = arith.select %23, %22, %c0 : index
+  %25 = cuf.alloc !fir.array<?x?x?x?x!fir.logical<4>>, %9, %14, %19, %24 : index, index, index, index {bindc_name = "lma", data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} -> !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+  %26 = fir.shape %9, %14, %19, %24 : (index, index, index, index) -> !fir.shape<4>
+  %27:2 = hlfir.declare %25(%26) {data_attr = #cuf.cuda<managed>, uniq_name = "_QFtesti4Elma"} : (!fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.shape<4>) -> (!fir.box<!fir.array<?x?x?x?x!fir.logical<4>>>, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>)
+  cuf.data_transfer %true to %27#1, %26 : !fir.shape<4> {transfer_kind = #cuf.cuda_transfer<host_device>} : i1, !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>>
+  cuf.free %27#1 : !fir.ref<!fir.array<?x?x?x?x!fir.logical<4>>> {data_attr = #cuf.cuda<managed>}
+  return
+}
+
+// CHECK-LABEL: func.func @_QPtesti4
+// CHECK: fir.call @_FortranACUFDataTransferCstDesc
+
+// -----
+
+func.func @_QQmain() attributes {fir.bindc_name = "T"} {
+  %c2 = arith.constant 2 : index
+  %c1 = arith.constant 1 : index
+  %c80 = arith.constant 80 : index
+  %c0 = arith.constant 0 : index
+  %0 = fir.dummy_scope : !fir.dscope
+  %1 = cuf.alloc !fir.box<!fir.heap<!fir.array<?x?x?xf16>>> {bindc_name = "a", data_attr = #cuf.cuda<device>, uniq_name = "_QFEa"} -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+  %2 = fir.zero_bits !fir.heap<!fir.array<?x?x?xf16>>
+  %3 = fir.shape %c0, %c0, %c0 : (index, index, index) -> !fir.shape<3>
+  %4 = fir.embox %2(%3) {allocator_idx = 2 : i32} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.box<!fir.heap<!fir.array<?x?x?xf16>>>
+  fir.store %4 to %1 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+  %5 = fir.declare %1 {data_attr = #cuf.cuda<device>, fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFEa"} : (!fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>) -> !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+  %6 = fir.address_of(@_QFEha) : !fir.ref<!fir.array<80x80x80xf32>>
+  %7 = fir.shape %c80, %c80, %c80 : (index, index, index) -> !fir.shape<3>
+  %8 = fir.declare %6(%7) {uniq_name = "_QFEha"} : (!fir.ref<!fir.array<80x80x80xf32>>, !fir.shape<3>) -> !fir.ref<!fir.array<80x80x80xf32>>
+  %9 = fir.address_of(@_QFECn) : !fir.ref<i32>
+  %10 = fir.declare %9 {fortran_attrs = #fir.var_attrs<parameter>, uniq_name = "_QFECn"} : (!fir.ref<i32>) -> !fir.ref<i32>
+  %11 = fir.load %5 : !fir.ref<!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>>
+  %12:3 = fir.box_dims %11, %c0 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+  %13:3 = fir.box_dims %11, %c1 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+  %14:3 = fir.box_dims %11, %c2 : (!fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, index) -> (index, index, index)
+  %15 = fir.shape %12#1, %13#1, %14#1 : (index, index, index) -> !fir.shape<3>
+  %16 = fir.allocmem !fir.array<?x?x?xf16>, %12#1, %13#1, %14#1 {bindc_name = ".tmp", uniq_name = ""}
+  %17 = fir.declare %16(%15) {uniq_name = ".tmp"} : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.heap<!fir.array<?x?x?xf16>>
+  %18 = fir.embox %17(%15) : (!fir.heap<!fir.array<?x?x?xf16>>, !fir.shape<3>) -> !fir.box<!fir.array<?x?x?xf16>>
+  cuf.data_transfer %11 to %18 {transfer_kind = #cuf.cuda_transfer<device_host>} : !fir.box<!fir.heap<!fir.array<?x?x?xf16>>>, !fir.box<!fir.array<?x?x?xf16>>
+  return
+}
+
+// CHECK-LABEL: func.func @_QQmain() 
+// CHECK: fir.call @_FortranACUFDataTransferDescDesc
+
 } // end of module
 
diff --git a/flang/test/Lower/CUDA/cuda-device-proc.cuf b/flang/test/Lower/CUDA/cuda-device-proc.cuf
index 83ee011..29c348c 100644
--- a/flang/test/Lower/CUDA/cuda-device-proc.cuf
+++ b/flang/test/Lower/CUDA/cuda-device-proc.cuf
@@ -438,7 +438,7 @@ end subroutine
 ! CHECK: nvvm.cp.async.bulk.commit.group
 ! CHECK: nvvm.cp.async.bulk.wait_group 0
 
-attributes(global) subroutine test_bulk_g2s(c, a, b, n)
+attributes(global) subroutine test_bulk_g2s(a)
   real(8), device :: a(*)
   real(8), shared :: tmpa(1024)
   integer(8), shared :: barrier1
@@ -448,3 +448,13 @@ end subroutine
 
 ! CHECK-LABEL: func.func @_QPtest_bulk_g2s
 ! CHECK: nvvm.cp.async.bulk.shared.cluster.global %{{.*}}, %{{.*}}, %{{.*}}, %{{.*}} : <7>, <1>
+
+attributes(global) subroutine test_bulk_s2g(a)
+  real(8), device :: a(*)
+  real(8), shared :: tmpa(1024)
+  integer(4) :: tx_count
+  call tma_bulk_s2g(tmpa, a(j), tx_count)
+end subroutine
+
+! CHECK-LABEL: func.func @_QPtest_bulk_s2g
+! CHECL: nvvm.cp.async.bulk.global.shared.cta %{{.*}}, %{{.*}}, %{{.*}} : <1>, <3>
diff --git a/flang/test/Lower/namelist.f90 b/flang/test/Lower/namelist.f90
index 770af46..a258da1 100644
--- a/flang/test/Lower/namelist.f90
+++ b/flang/test/Lower/namelist.f90
@@ -123,6 +123,45 @@ subroutine global_pointer
   write(10, nml=mygroup)
 end
 
+module m
+  type base
+    real :: r1
+  end type
+  interface write(formatted)
+    subroutine writeformatted(dtv, unit, iotype, v_list, iostat, iomsg )
+      import base
+        class(base), intent(in) :: dtv
+        integer,  intent(in) :: unit
+        character(*), intent(in) :: iotype
+        integer, intent(in)     :: v_list(:)
+        integer,  intent(out) :: iostat
+        character(*),  intent(inout) :: iomsg
+     end subroutine
+  end interface
+end module
+
+! CHECK-LABEL: c.func @_QPlocal_poly_namelist
+subroutine local_poly_namelist
+  use m
+  class(base), allocatable :: b1
+! CHECK:  %[[V_0:[0-9]+]]  = fir.alloca !fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>
+! CHECK:  %[[V_2:[0-9]+]]  = fir.alloca !fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>> {bindc_name = "b1", uniq_name = "_QFlocal_poly_namelistEb1"}
+! CHECK:  %[[V_5:[0-9]+]]  = fir.declare %[[V_2]] {fortran_attrs = #fir.var_attrs<allocatable>, uniq_name = "_QFlocal_poly_namelistEb1"} : (!fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>) -> !fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK:  %[[V_9:[0-9]+]]  = fir.alloca !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK:  %[[V_10:[0-9]+]] = fir.undefined !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK:  %[[V_11:[0-9]+]] = fir.address_of(@_QQclX623100) : !fir.ref<!fir.char<1,3>>
+! CHECK:  %[[V_12:[0-9]+]] = fir.convert %[[V_11]] : (!fir.ref<!fir.char<1,3>>) -> !fir.ref<i8>
+! CHECK:  %[[V_13:[0-9]+]] = fir.insert_value %[[V_10]], %[[V_12]], [0 : index, 0 : index] : (!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>, !fir.ref<i8>) -> !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK:  %[[V_14:[0-9]+]] = fir.load %[[V_5]] : !fir.ref<!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK:  %[[V_15:[0-9]+]] = fir.rebox %[[V_14]] : (!fir.class<!fir.heap<!fir.type<_QMmTbase{r1:f32}>>>) -> !fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>
+! CHECK:  fir.store %[[V_15]] to %[[V_0]] : !fir.ref<!fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>>
+! CHECK:  %[[V_16:[0-9]+]] = fir.convert %[[V_0]] : (!fir.ref<!fir.class<!fir.ptr<!fir.type<_QMmTbase{r1:f32}>>>>) -> !fir.ref<!fir.box<none>>
+! CHECK:  %[[V_17:[0-9]+]] = fir.insert_value %[[V_13]], %[[V_16]], [0 : index, 1 : index] : (!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>, !fir.ref<!fir.box<none>>) -> !fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>
+! CHECK:  fir.store %[[V_17]] to %[[V_9]] : !fir.ref<!fir.array<1xtuple<!fir.ref<i8>, !fir.ref<!fir.box<none>>>>>
+  namelist/mygroup/b1
+  write(10, nml=mygroup)
+end subroutine
+
 module mmm
   real rrr
   namelist /aaa/ rrr
@@ -142,3 +181,4 @@ end
 
 ! CHECK-NOT:   ggg
 ! CHECK:       fir.string_lit "aaa\00"(4) : !fir.char<1,4>
+