; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S %s -passes=scalarize-masked-mem-intrin -mtriple=amdgcn-amd-amdhsa | FileCheck %s ; COM: Test that, unlike on CPU targets, the mask doesn't get bitcast to a scalar, ; COM: since, on GPUs, each i1 takes up at least one register and so they should ; COM: be treated separately. define void @scalarize_v2i32(<2 x ptr> %p, <2 x i1> %mask, <2 x i32> %value) { ; CHECK-LABEL: define void @scalarize_v2i32( ; CHECK-SAME: <2 x ptr> [[P:%.*]], <2 x i1> [[MASK:%.*]], <2 x i32> [[VALUE:%.*]]) { ; CHECK-NEXT: [[MASK0:%.*]] = extractelement <2 x i1> [[MASK]], i64 0 ; CHECK-NEXT: br i1 [[MASK0]], label %[[COND_STORE:.*]], label %[[ELSE:.*]] ; CHECK: [[COND_STORE]]: ; CHECK-NEXT: [[ELT0:%.*]] = extractelement <2 x i32> [[VALUE]], i64 0 ; CHECK-NEXT: [[PTR0:%.*]] = extractelement <2 x ptr> [[P]], i64 0 ; CHECK-NEXT: store i32 [[ELT0]], ptr [[PTR0]], align 8 ; CHECK-NEXT: br label %[[ELSE]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[MASK1:%.*]] = extractelement <2 x i1> [[MASK]], i64 1 ; CHECK-NEXT: br i1 [[MASK1]], label %[[COND_STORE1:.*]], label %[[ELSE2:.*]] ; CHECK: [[COND_STORE1]]: ; CHECK-NEXT: [[ELT1:%.*]] = extractelement <2 x i32> [[VALUE]], i64 1 ; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x ptr> [[P]], i64 1 ; CHECK-NEXT: store i32 [[ELT1]], ptr [[PTR1]], align 8 ; CHECK-NEXT: br label %[[ELSE2]] ; CHECK: [[ELSE2]]: ; CHECK-NEXT: ret void ; call void @llvm.masked.scatter.v2i32.v2p0(<2 x i32> %value, <2 x ptr> %p, i32 8, <2 x i1> %mask) ret void } declare void @llvm.masked.scatter.v2i32.v2p0(<2 x i32>, <2 x ptr>, i32, <2 x i1>)