; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -passes='dxil-data-scalarization' -mtriple=dxil-pc-shadermodel6.3-library %s | FileCheck %s ; Ensure that insertelement instructions have no side effects on each other ; even in the presence of control flow define void @test_multiple_insert(i32 %c, i32 %i, i32 %j) { ; CHECK-LABEL: define void @test_multiple_insert( ; CHECK-SAME: i32 [[C:%.*]], i32 [[I:%.*]], i32 [[J:%.*]]) { ; CHECK-NEXT: [[V0_ALLOCA:%.*]] = alloca [2 x i32], align 4 ; CHECK-NEXT: [[V_ALLOCA:%.*]] = alloca [2 x i32], align 4 ; CHECK-NEXT: [[V0_0:%.*]] = insertelement <2 x i32> poison, i32 0, i32 0 ; CHECK-NEXT: [[V0:%.*]] = insertelement <2 x i32> [[V0_0]], i32 0, i32 1 ; CHECK-NEXT: [[V0_EXTRACT0:%.*]] = extractelement <2 x i32> [[V0]], i64 0 ; CHECK-NEXT: [[V0_INDEX0:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[V0_EXTRACT0]], ptr [[V0_INDEX0]], align 4 ; CHECK-NEXT: [[V0_EXTRACT1:%.*]] = extractelement <2 x i32> [[V0]], i64 1 ; CHECK-NEXT: [[V0_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 1 ; CHECK-NEXT: store i32 [[V0_EXTRACT1]], ptr [[V0_INDEX1]], align 4 ; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[C]], 1 ; CHECK-NEXT: br i1 [[COND]], label %[[IF:.*]], label %[[ELSE:.*]] ; CHECK: [[IF]]: ; CHECK-NEXT: [[V1_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[V1_LOAD:%.*]] = load i32, ptr [[V1_INDEX]], align 4 ; CHECK-NEXT: store i32 1, ptr [[V1_INDEX]], align 4 ; CHECK-NEXT: [[V1_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 ; CHECK-NEXT: [[V1_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V1_LOAD0]], i32 0 ; CHECK-NEXT: [[V1_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 ; CHECK-NEXT: [[V1_INSERT1:%.*]] = insertelement <2 x i32> [[V1_INSERT0]], i32 [[V1_LOAD1]], i32 1 ; CHECK-NEXT: store i32 [[V1_LOAD]], ptr [[V1_INDEX]], align 4 ; CHECK-NEXT: br label %[[EXIT:.*]] ; CHECK: [[ELSE]]: ; CHECK-NEXT: [[V2_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V0_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[V2_LOAD:%.*]] = load i32, ptr [[V2_INDEX]], align 4 ; CHECK-NEXT: store i32 2, ptr [[V2_INDEX]], align 4 ; CHECK-NEXT: [[V2_LOAD0:%.*]] = load i32, ptr [[V0_INDEX0]], align 4 ; CHECK-NEXT: [[V2_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V2_LOAD0]], i32 0 ; CHECK-NEXT: [[V2_LOAD1:%.*]] = load i32, ptr [[V0_INDEX1]], align 4 ; CHECK-NEXT: [[V2_INSERT1:%.*]] = insertelement <2 x i32> [[V2_INSERT0]], i32 [[V2_LOAD1]], i32 1 ; CHECK-NEXT: store i32 [[V2_LOAD]], ptr [[V2_INDEX]], align 4 ; CHECK-NEXT: br label %[[EXIT]] ; CHECK: [[EXIT]]: ; CHECK-NEXT: [[V:%.*]] = phi <2 x i32> [ [[V1_INSERT1]], %[[IF]] ], [ [[V2_INSERT1]], %[[ELSE]] ] ; CHECK-NEXT: [[V_EXTRACT:%.*]] = extractelement <2 x i32> [[V]], i64 0 ; CHECK-NEXT: [[V_INDEX:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[V_EXTRACT]], ptr [[V_INDEX]], align 4 ; CHECK-NEXT: [[V_EXTRACT10:%.*]] = extractelement <2 x i32> [[V]], i64 1 ; CHECK-NEXT: [[V_INDEX1:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 1 ; CHECK-NEXT: store i32 [[V_EXTRACT10]], ptr [[V_INDEX1]], align 4 ; CHECK-NEXT: [[V3_INDEXJ:%.*]] = getelementptr inbounds [2 x i32], ptr [[V_ALLOCA]], i32 0, i32 [[J]] ; CHECK-NEXT: [[V3_LOAD:%.*]] = load i32, ptr [[V3_INDEXJ]], align 4 ; CHECK-NEXT: store i32 3, ptr [[V3_INDEXJ]], align 4 ; CHECK-NEXT: [[V3_LOAD0:%.*]] = load i32, ptr [[V_INDEX]], align 4 ; CHECK-NEXT: [[V3_INSERT0:%.*]] = insertelement <2 x i32> poison, i32 [[V3_LOAD0]], i32 0 ; CHECK-NEXT: [[V3_LOAD1:%.*]] = load i32, ptr [[V_INDEX1]], align 4 ; CHECK-NEXT: [[V3_INSERT1:%.*]] = insertelement <2 x i32> [[V3_INSERT0]], i32 [[V3_LOAD1]], i32 1 ; CHECK-NEXT: store i32 [[V3_LOAD]], ptr [[V3_INDEXJ]], align 4 ; CHECK-NEXT: ret void ; %v0_0 = insertelement <2 x i32> poison, i32 0, i32 0 %v0 = insertelement <2 x i32> %v0_0, i32 0, i32 1 %cond = icmp eq i32 %c, 1 br i1 %cond, label %if, label %else if: %v1 = insertelement <2 x i32> %v0, i32 1, i32 %i br label %exit else: %v2 = insertelement <2 x i32> %v0, i32 2, i32 %i br label %exit exit: %v = phi <2 x i32> [ %v1, %if ], [ %v2, %else ] %v3 = insertelement <2 x i32> %v, i32 3, i32 %j ret void } ; Allocas can be reused across insert/extractelement instructions on the same vector define void @test_alloca_reuse(<3 x i32> %v, i32 %a, i32 %i) { ; CHECK-LABEL: define void @test_alloca_reuse( ; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) { ; CHECK-NEXT: [[EE1_ALLOCA:%.*]] = alloca [3 x i32], align 4 ; CHECK-NEXT: [[EE1_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0 ; CHECK-NEXT: [[EE1_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[EE1_EXTRACT]], ptr [[EE1_INDEX]], align 4 ; CHECK-NEXT: [[EE1_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1 ; CHECK-NEXT: [[EE1_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 1 ; CHECK-NEXT: store i32 [[EE1_EXTRACT1]], ptr [[EE1_INDEX2]], align 4 ; CHECK-NEXT: [[EE1_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2 ; CHECK-NEXT: [[EE1_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 2 ; CHECK-NEXT: store i32 [[EE1_EXTRACT3]], ptr [[EE1_INDEX4]], align 4 ; CHECK-NEXT: [[EE1_INDEX5:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[EE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX5]], align 4 ; CHECK-NEXT: [[IE1_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[EE1_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[IE1_LOAD1:%.*]] = load i32, ptr [[IE1_DYNINDEX]], align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[IE1_DYNINDEX]], align 4 ; CHECK-NEXT: [[IE1_LOAD:%.*]] = load i32, ptr [[EE1_INDEX]], align 4 ; CHECK-NEXT: [[IE1_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE1_LOAD]], i32 0 ; CHECK-NEXT: [[IE1_LOAD6:%.*]] = load i32, ptr [[EE1_INDEX2]], align 4 ; CHECK-NEXT: [[IE1_INSERT7:%.*]] = insertelement <3 x i32> [[IE1_INSERT]], i32 [[IE1_LOAD6]], i32 1 ; CHECK-NEXT: [[IE1_LOAD8:%.*]] = load i32, ptr [[EE1_INDEX4]], align 4 ; CHECK-NEXT: [[IE1_INSERT9:%.*]] = insertelement <3 x i32> [[IE1_INSERT7]], i32 [[IE1_LOAD8]], i32 2 ; CHECK-NEXT: store i32 [[IE1_LOAD1]], ptr [[IE1_DYNINDEX]], align 4 ; CHECK-NEXT: ret void ; %ee1 = extractelement <3 x i32> %v, i32 %i %ie1 = insertelement <3 x i32> %v, i32 %a, i32 %i ret void } define float @extract_float_vec_dynamic(<4 x float> %v, i32 %i) { ; CHECK-LABEL: define float @extract_float_vec_dynamic( ; CHECK-SAME: <4 x float> [[V:%.*]], i32 [[I:%.*]]) { ; CHECK-NEXT: [[EE_ALLOCA:%.*]] = alloca [4 x float], align 4 ; CHECK-NEXT: [[EE_EXTRACT:%.*]] = extractelement <4 x float> [[V]], i64 0 ; CHECK-NEXT: [[EE_INDEX:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: store float [[EE_EXTRACT]], ptr [[EE_INDEX]], align 4 ; CHECK-NEXT: [[EE_EXTRACT1:%.*]] = extractelement <4 x float> [[V]], i64 1 ; CHECK-NEXT: [[EE_INDEX2:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 1 ; CHECK-NEXT: store float [[EE_EXTRACT1]], ptr [[EE_INDEX2]], align 4 ; CHECK-NEXT: [[EE_EXTRACT3:%.*]] = extractelement <4 x float> [[V]], i64 2 ; CHECK-NEXT: [[EE_INDEX4:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 2 ; CHECK-NEXT: store float [[EE_EXTRACT3]], ptr [[EE_INDEX4]], align 4 ; CHECK-NEXT: [[EE_EXTRACT5:%.*]] = extractelement <4 x float> [[V]], i64 3 ; CHECK-NEXT: [[EE_INDEX6:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 3 ; CHECK-NEXT: store float [[EE_EXTRACT5]], ptr [[EE_INDEX6]], align 4 ; CHECK-NEXT: [[EE_INDEX7:%.*]] = getelementptr inbounds [4 x float], ptr [[EE_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[EE_LOAD:%.*]] = load float, ptr [[EE_INDEX7]], align 4 ; CHECK-NEXT: ret float [[EE_LOAD]] ; %ee = extractelement <4 x float> %v, i32 %i ret float %ee } define <3 x i32> @insert_i32_vec_dynamic(<3 x i32> %v, i32 %a, i32 %i) { ; CHECK-LABEL: define <3 x i32> @insert_i32_vec_dynamic( ; CHECK-SAME: <3 x i32> [[V:%.*]], i32 [[A:%.*]], i32 [[I:%.*]]) { ; CHECK-NEXT: [[IE_ALLOCA:%.*]] = alloca [3 x i32], align 4 ; CHECK-NEXT: [[IE_EXTRACT:%.*]] = extractelement <3 x i32> [[V]], i64 0 ; CHECK-NEXT: [[IE_INDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 0 ; CHECK-NEXT: store i32 [[IE_EXTRACT]], ptr [[IE_INDEX]], align 4 ; CHECK-NEXT: [[IE_EXTRACT1:%.*]] = extractelement <3 x i32> [[V]], i64 1 ; CHECK-NEXT: [[IE_INDEX2:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 1 ; CHECK-NEXT: store i32 [[IE_EXTRACT1]], ptr [[IE_INDEX2]], align 4 ; CHECK-NEXT: [[IE_EXTRACT3:%.*]] = extractelement <3 x i32> [[V]], i64 2 ; CHECK-NEXT: [[IE_INDEX4:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 2 ; CHECK-NEXT: store i32 [[IE_EXTRACT3]], ptr [[IE_INDEX4]], align 4 ; CHECK-NEXT: [[IE_DYNINDEX:%.*]] = getelementptr inbounds [3 x i32], ptr [[IE_ALLOCA]], i32 0, i32 [[I]] ; CHECK-NEXT: [[IE_LOAD1:%.*]] = load i32, ptr [[IE_DYNINDEX]], align 4 ; CHECK-NEXT: store i32 [[A]], ptr [[IE_DYNINDEX]], align 4 ; CHECK-NEXT: [[IE_LOAD:%.*]] = load i32, ptr [[IE_INDEX]], align 4 ; CHECK-NEXT: [[IE_INSERT:%.*]] = insertelement <3 x i32> poison, i32 [[IE_LOAD]], i32 0 ; CHECK-NEXT: [[IE_LOAD5:%.*]] = load i32, ptr [[IE_INDEX2]], align 4 ; CHECK-NEXT: [[IE_INSERT6:%.*]] = insertelement <3 x i32> [[IE_INSERT]], i32 [[IE_LOAD5]], i32 1 ; CHECK-NEXT: [[IE_LOAD7:%.*]] = load i32, ptr [[IE_INDEX4]], align 4 ; CHECK-NEXT: [[IE_INSERT8:%.*]] = insertelement <3 x i32> [[IE_INSERT6]], i32 [[IE_LOAD7]], i32 2 ; CHECK-NEXT: store i32 [[IE_LOAD1]], ptr [[IE_DYNINDEX]], align 4 ; CHECK-NEXT: ret <3 x i32> [[IE_INSERT8]] ; %ie = insertelement <3 x i32> %v, i32 %a, i32 %i ret <3 x i32> %ie } ; An extractelement with a constant index should not be converted to array form define i16 @extract_i16_vec_constant(<4 x i16> %v) { ; CHECK-LABEL: define i16 @extract_i16_vec_constant( ; CHECK-SAME: <4 x i16> [[V:%.*]]) { ; CHECK-NEXT: [[EE:%.*]] = extractelement <4 x i16> [[V]], i32 1 ; CHECK-NEXT: ret i16 [[EE]] ; %ee = extractelement <4 x i16> %v, i32 1 ret i16 %ee } ; An insertelement with a constant index should not be converted to array form define <2 x half> @insert_half_vec_constant(<2 x half> %v, half %a) { ; CHECK-LABEL: define <2 x half> @insert_half_vec_constant( ; CHECK-SAME: <2 x half> [[V:%.*]], half [[A:%.*]]) { ; CHECK-NEXT: [[IE:%.*]] = insertelement <2 x half> [[V]], half [[A]], i32 1 ; CHECK-NEXT: ret <2 x half> [[IE]] ; %ie = insertelement <2 x half> %v, half %a, i32 1 ret <2 x half> %ie }