; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn -passes="expand-fp" %s -S -o - | FileCheck %s define amdgpu_kernel void @frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_f16( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr half, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load half, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load half, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[AX:%.*]] = call half @llvm.fabs.f16(half [[R0]]) ; CHECK-NEXT: [[AY:%.*]] = call half @llvm.fabs.f16(half [[R1]]) ; CHECK-NEXT: [[AX1:%.*]] = fpext half [[AX]] to float ; CHECK-NEXT: [[AY2:%.*]] = fpext half [[AY]] to float ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt float [[AX1]], [[AY2]] ; CHECK-NEXT: br i1 [[TMP1]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB2:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP25:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP16:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ueq half [[R1]], 0xH0000 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], half 0xH7E00, half [[RET]] ; CHECK-NEXT: [[TMP5:%.*]] = call half @llvm.fabs.f16(half [[R0]]) ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ult half [[TMP5]], 0xH7C00 ; CHECK-NEXT: [[R2:%.*]] = select i1 [[TMP6]], half [[TMP4]], half 0xH7E00 ; CHECK-NEXT: store half [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP9]], 1 ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 11) ; CHECK-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP12]], 1 ; CHECK-NEXT: [[AY4:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY4]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[NB]], 11 ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP14:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[R0]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp oeq float [[AX1]], [[AY2]] ; CHECK-NEXT: [[TMP16]] = select i1 [[TMP15]], half [[TMP14]], half [[R0]] ; CHECK-NEXT: br label %[[BB2]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP17]]) ; CHECK-NEXT: [[TMP18:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.fma.f32(float [[TMP18]], float [[AY4]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX5]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX5]], [[AY4]] ; CHECK-NEXT: [[AX6:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX5]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX6]], i32 11) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 11 ; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[NB_IV]], 11 ; CHECK-NEXT: br i1 [[TMP19]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[NB_EXIT_PHI]], 11 ; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 1 ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP21]]) ; CHECK-NEXT: [[TMP22:%.*]] = fmul float [[AX7]], [[AYINV]] ; CHECK-NEXT: [[Q8:%.*]] = call float @llvm.rint.f32(float [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = fneg float [[Q8]] ; CHECK-NEXT: [[AX9:%.*]] = call float @llvm.fma.f32(float [[TMP23]], float [[AY4]], float [[AX7]]) ; CHECK-NEXT: [[CLT10:%.*]] = fcmp olt float [[AX9]], 0.000000e+00 ; CHECK-NEXT: [[AXP11:%.*]] = fadd float [[AX9]], [[AY4]] ; CHECK-NEXT: [[AX12:%.*]] = select i1 [[CLT10]], float [[AXP11]], float [[AX9]] ; CHECK-NEXT: [[AX13:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX12]], i32 [[EY]]) ; CHECK-NEXT: [[TMP24:%.*]] = fptrunc float [[AX13]] to half ; CHECK-NEXT: [[TMP25]] = call half @llvm.copysign.f16(half [[TMP24]], half [[R0]]) ; CHECK-NEXT: br label %[[BB2]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 %r0 = load half, ptr addrspace(1) %in1, align 4 %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem half %r0, %r1 store half %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @fast_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @fast_frem_f16( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr half, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load half, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load half, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv half [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.trunc.f16(half [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg half [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call half @llvm.fma.f16(half [[TMP3]], half [[R1]], half [[R0]]) ; CHECK-NEXT: store half [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 %r0 = load half, ptr addrspace(1) %in1, align 4 %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem fast half %r0, %r1 store half %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @unsafe_frem_f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @unsafe_frem_f16( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr half, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load half, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load half, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv half [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call half @llvm.trunc.f16(half [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg half [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call half @llvm.fma.f16(half [[TMP3]], half [[R1]], half [[R0]]) ; CHECK-NEXT: store half [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %gep2 = getelementptr half, ptr addrspace(1) %in2, i32 4 %r0 = load half, ptr addrspace(1) %in1, align 4 %r1 = load half, ptr addrspace(1) %gep2, align 4 %r2 = frem afn half %r0, %r1 store half %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_f32( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load float, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load float, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[AX:%.*]] = call float @llvm.fabs.f32(float [[R0]]) ; CHECK-NEXT: [[AY:%.*]] = call float @llvm.fabs.f32(float [[R1]]) ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt float [[AX]], [[AY]] ; CHECK-NEXT: br i1 [[TMP1]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB2:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP24:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP16:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ueq float [[R1]], 0.000000e+00 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], float 0x7FF8000000000000, float [[RET]] ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fabs.f32(float [[R0]]) ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ult float [[TMP5]], 0x7FF0000000000000 ; CHECK-NEXT: [[R2:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0x7FF8000000000000 ; CHECK-NEXT: store float [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP7:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { float, i32 } [[TMP7]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { float, i32 } [[TMP7]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP9]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP8]], i32 12) ; CHECK-NEXT: [[TMP10:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { float, i32 } [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { float, i32 } [[TMP10]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP12]], 1 ; CHECK-NEXT: [[AY2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP11]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY2]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[NB]], 12 ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP14:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[R0]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp oeq float [[AX]], [[AY]] ; CHECK-NEXT: [[TMP16]] = select i1 [[TMP15]], float [[TMP14]], float [[R0]] ; CHECK-NEXT: br label %[[BB2]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP17]]) ; CHECK-NEXT: [[TMP18:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.fma.f32(float [[TMP18]], float [[AY2]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX3]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX3]], [[AY2]] ; CHECK-NEXT: [[AX4:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX3]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX4]], i32 12) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 12 ; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[NB_IV]], 12 ; CHECK-NEXT: br i1 [[TMP19]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[NB_EXIT_PHI]], 12 ; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 1 ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP21]]) ; CHECK-NEXT: [[TMP22:%.*]] = fmul float [[AX5]], [[AYINV]] ; CHECK-NEXT: [[Q6:%.*]] = call float @llvm.rint.f32(float [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = fneg float [[Q6]] ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.fma.f32(float [[TMP23]], float [[AY2]], float [[AX5]]) ; CHECK-NEXT: [[CLT8:%.*]] = fcmp olt float [[AX7]], 0.000000e+00 ; CHECK-NEXT: [[AXP9:%.*]] = fadd float [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], float [[AXP9]], float [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX10]], i32 [[EY]]) ; CHECK-NEXT: [[TMP24]] = call float @llvm.copysign.f32(float [[AX11]], float [[R0]]) ; CHECK-NEXT: br label %[[BB2]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 %r0 = load float, ptr addrspace(1) %in1, align 4 %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem float %r0, %r1 store float %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @fast_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @fast_frem_f32( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load float, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load float, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg float [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[R1]], float [[R0]]) ; CHECK-NEXT: store float [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 %r0 = load float, ptr addrspace(1) %in1, align 4 %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem fast float %r0, %r1 store float %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @unsafe_frem_f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @unsafe_frem_f32( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr float, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load float, ptr addrspace(1) [[IN1]], align 4 ; CHECK-NEXT: [[R1:%.*]] = load float, ptr addrspace(1) [[GEP2]], align 4 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv float [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call float @llvm.trunc.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg float [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call float @llvm.fma.f32(float [[TMP3]], float [[R1]], float [[R0]]) ; CHECK-NEXT: store float [[R2]], ptr addrspace(1) [[OUT]], align 4 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %gep2 = getelementptr float, ptr addrspace(1) %in2, i32 4 %r0 = load float, ptr addrspace(1) %in1, align 4 %r1 = load float, ptr addrspace(1) %gep2, align 4 %r2 = frem afn float %r0, %r1 store float %r2, ptr addrspace(1) %out, align 4 ret void } define amdgpu_kernel void @frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_f64( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[R0:%.*]] = load double, ptr addrspace(1) [[IN1]], align 8 ; CHECK-NEXT: [[R1:%.*]] = load double, ptr addrspace(1) [[IN2]], align 8 ; CHECK-NEXT: [[AX:%.*]] = call double @llvm.fabs.f64(double [[R0]]) ; CHECK-NEXT: [[AY:%.*]] = call double @llvm.fabs.f64(double [[R1]]) ; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double [[AX]], [[AY]] ; CHECK-NEXT: br i1 [[TMP1]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB2:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi double [ [[TMP24:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP16:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ueq double [[R1]], 0.000000e+00 ; CHECK-NEXT: [[TMP4:%.*]] = select i1 [[TMP3]], double 0x7FF8000000000000, double [[RET]] ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fabs.f64(double [[R0]]) ; CHECK-NEXT: [[TMP6:%.*]] = fcmp ult double [[TMP5]], 0x7FF0000000000000 ; CHECK-NEXT: [[R2:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0x7FF8000000000000 ; CHECK-NEXT: store double [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP7:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX]]) ; CHECK-NEXT: [[TMP8:%.*]] = extractvalue { double, i32 } [[TMP7]], 0 ; CHECK-NEXT: [[TMP9:%.*]] = extractvalue { double, i32 } [[TMP7]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP9]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP8]], i32 26) ; CHECK-NEXT: [[TMP10:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY]]) ; CHECK-NEXT: [[TMP11:%.*]] = extractvalue { double, i32 } [[TMP10]], 0 ; CHECK-NEXT: [[TMP12:%.*]] = extractvalue { double, i32 } [[TMP10]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP12]], 1 ; CHECK-NEXT: [[AY2:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP11]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv double 1.000000e+00, [[AY2]] ; CHECK-NEXT: [[TMP13:%.*]] = icmp sgt i32 [[NB]], 26 ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP14:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[R0]]) ; CHECK-NEXT: [[TMP15:%.*]] = fcmp oeq double [[AX]], [[AY]] ; CHECK-NEXT: [[TMP16]] = select i1 [[TMP15]], double [[TMP14]], double [[R0]] ; CHECK-NEXT: br label %[[BB2]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi double [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP17:%.*]] = fmul double [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call double @llvm.rint.f64(double [[TMP17]]) ; CHECK-NEXT: [[TMP18:%.*]] = fneg double [[Q]] ; CHECK-NEXT: [[AX3:%.*]] = call double @llvm.fma.f64(double [[TMP18]], double [[AY2]], double [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt double [[AX3]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd double [[AX3]], [[AY2]] ; CHECK-NEXT: [[AX4:%.*]] = select i1 [[CLT]], double [[AXP]], double [[AX3]] ; CHECK-NEXT: [[AX_UPDATE]] = call double @llvm.ldexp.f64.i32(double [[AX4]], i32 26) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 26 ; CHECK-NEXT: [[TMP19:%.*]] = icmp sgt i32 [[NB_IV]], 26 ; CHECK-NEXT: br i1 [[TMP19]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi double [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP20:%.*]] = sub i32 [[NB_EXIT_PHI]], 26 ; CHECK-NEXT: [[TMP21:%.*]] = add i32 [[TMP20]], 1 ; CHECK-NEXT: [[AX5:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX_EXIT_PHI]], i32 [[TMP21]]) ; CHECK-NEXT: [[TMP22:%.*]] = fmul double [[AX5]], [[AYINV]] ; CHECK-NEXT: [[Q6:%.*]] = call double @llvm.rint.f64(double [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = fneg double [[Q6]] ; CHECK-NEXT: [[AX7:%.*]] = call double @llvm.fma.f64(double [[TMP23]], double [[AY2]], double [[AX5]]) ; CHECK-NEXT: [[CLT8:%.*]] = fcmp olt double [[AX7]], 0.000000e+00 ; CHECK-NEXT: [[AXP9:%.*]] = fadd double [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], double [[AXP9]], double [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX10]], i32 [[EY]]) ; CHECK-NEXT: [[TMP24]] = call double @llvm.copysign.f64(double [[AX11]], double [[R0]]) ; CHECK-NEXT: br label %[[BB2]] ; ptr addrspace(1) %in2) { %r0 = load double, ptr addrspace(1) %in1, align 8 %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem double %r0, %r1 store double %r2, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @fast_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @fast_frem_f64( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[R0:%.*]] = load double, ptr addrspace(1) [[IN1]], align 8 ; CHECK-NEXT: [[R1:%.*]] = load double, ptr addrspace(1) [[IN2]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg double [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call double @llvm.fma.f64(double [[TMP3]], double [[R1]], double [[R0]]) ; CHECK-NEXT: store double [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %r0 = load double, ptr addrspace(1) %in1, align 8 %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem fast double %r0, %r1 store double %r2, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @unsafe_frem_f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @unsafe_frem_f64( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[R0:%.*]] = load double, ptr addrspace(1) [[IN1]], align 8 ; CHECK-NEXT: [[R1:%.*]] = load double, ptr addrspace(1) [[IN2]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv double [[R0]], [[R1]] ; CHECK-NEXT: [[TMP2:%.*]] = call double @llvm.trunc.f64(double [[TMP1]]) ; CHECK-NEXT: [[TMP3:%.*]] = fneg double [[TMP2]] ; CHECK-NEXT: [[R2:%.*]] = call double @llvm.fma.f64(double [[TMP3]], double [[R1]], double [[R0]]) ; CHECK-NEXT: store double [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; ptr addrspace(1) %in2) { %r0 = load double, ptr addrspace(1) %in1, align 8 %r1 = load double, ptr addrspace(1) %in2, align 8 %r2 = frem afn double %r0, %r1 store double %r2, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @frem_v2f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_v2f16( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <2 x half>, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load <2 x half>, ptr addrspace(1) [[IN1]], align 8 ; CHECK-NEXT: [[R1:%.*]] = load <2 x half>, ptr addrspace(1) [[GEP2]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x half> [[R0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x half> [[R1]], i64 0 ; CHECK-NEXT: [[AX:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call half @llvm.fabs.f16(half [[TMP2]]) ; CHECK-NEXT: [[AX1:%.*]] = fpext half [[AX]] to float ; CHECK-NEXT: [[AY2:%.*]] = fpext half [[AY]] to float ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX1]], [[AY2]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB4:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP38:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq half [[TMP2]], 0xH0000 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], half 0xH7E00, half [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ult half [[TMP7]], 0xH7C00 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], half [[TMP6]], half 0xH7E00 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x half> poison, half [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x half> [[R0]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x half> [[R1]], i64 1 ; CHECK-NEXT: [[AX14:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) ; CHECK-NEXT: [[AY15:%.*]] = call half @llvm.fabs.f16(half [[TMP12]]) ; CHECK-NEXT: [[AX16:%.*]] = fpext half [[AX14]] to float ; CHECK-NEXT: [[AY17:%.*]] = fpext half [[AY15]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX16]], [[AY17]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] ; CHECK: [[BB14:.*]]: ; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP57:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP48:%.*]], %[[FREM_ELSE20]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq half [[TMP12]], 0xH0000 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], half 0xH7E00, half [[RET18]] ; CHECK-NEXT: [[TMP17:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp ult half [[TMP17]], 0xH7C00 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], half [[TMP16]], half 0xH7E00 ; CHECK-NEXT: [[R2:%.*]] = insertelement <2 x half> [[TMP10]], half [[TMP19]], i64 1 ; CHECK-NEXT: store <2 x half> [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 11) ; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 ; CHECK-NEXT: [[AY4:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP24]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY4]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 11 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP27:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq float [[AX1]], [[AY2]] ; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], half [[TMP27]], half [[TMP1]] ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP30:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP30]]) ; CHECK-NEXT: [[TMP31:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.fma.f32(float [[TMP31]], float [[AY4]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX5]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX5]], [[AY4]] ; CHECK-NEXT: [[AX6:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX5]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX6]], i32 11) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 11 ; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[NB_IV]], 11 ; CHECK-NEXT: br i1 [[TMP32]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[NB_EXIT_PHI]], 11 ; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], 1 ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP34]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul float [[AX7]], [[AYINV]] ; CHECK-NEXT: [[Q8:%.*]] = call float @llvm.rint.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP36:%.*]] = fneg float [[Q8]] ; CHECK-NEXT: [[AX9:%.*]] = call float @llvm.fma.f32(float [[TMP36]], float [[AY4]], float [[AX7]]) ; CHECK-NEXT: [[CLT10:%.*]] = fcmp olt float [[AX9]], 0.000000e+00 ; CHECK-NEXT: [[AXP11:%.*]] = fadd float [[AX9]], [[AY4]] ; CHECK-NEXT: [[AX12:%.*]] = select i1 [[CLT10]], float [[AXP11]], float [[AX9]] ; CHECK-NEXT: [[AX13:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX12]], i32 [[EY]]) ; CHECK-NEXT: [[TMP37:%.*]] = fptrunc float [[AX13]] to half ; CHECK-NEXT: [[TMP38]] = call half @llvm.copysign.f16(half [[TMP37]], half [[TMP1]]) ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_COMPUTE19]]: ; CHECK-NEXT: [[TMP39:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP39]], 0 ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP39]], 1 ; CHECK-NEXT: [[EX21:%.*]] = sub i32 [[TMP41]], 1 ; CHECK-NEXT: [[AX22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP40]], i32 11) ; CHECK-NEXT: [[TMP42:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP42]], 0 ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP42]], 1 ; CHECK-NEXT: [[EY23:%.*]] = sub i32 [[TMP44]], 1 ; CHECK-NEXT: [[AY24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP43]], i32 1) ; CHECK-NEXT: [[NB25:%.*]] = sub i32 [[EX21]], [[EY23]] ; CHECK-NEXT: [[AYINV26:%.*]] = fdiv float 1.000000e+00, [[AY24]] ; CHECK-NEXT: [[TMP45:%.*]] = icmp sgt i32 [[NB25]], 11 ; CHECK-NEXT: br i1 [[TMP45]], label %[[FREM_LOOP_BODY27:.*]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_ELSE20]]: ; CHECK-NEXT: [[TMP46:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) ; CHECK-NEXT: [[TMP47:%.*]] = fcmp oeq float [[AX16]], [[AY17]] ; CHECK-NEXT: [[TMP48]] = select i1 [[TMP47]], half [[TMP46]], half [[TMP11]] ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY27]]: ; CHECK-NEXT: [[NB_IV29:%.*]] = phi i32 [ [[NB25]], %[[FREM_COMPUTE19]] ], [ [[NB_UPDATE37:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[AX_LOOP_PHI30:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_UPDATE36:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[TMP49:%.*]] = fmul float [[AX_LOOP_PHI30]], [[AYINV26]] ; CHECK-NEXT: [[Q31:%.*]] = call float @llvm.rint.f32(float [[TMP49]]) ; CHECK-NEXT: [[TMP50:%.*]] = fneg float [[Q31]] ; CHECK-NEXT: [[AX32:%.*]] = call float @llvm.fma.f32(float [[TMP50]], float [[AY24]], float [[AX_LOOP_PHI30]]) ; CHECK-NEXT: [[CLT33:%.*]] = fcmp olt float [[AX32]], 0.000000e+00 ; CHECK-NEXT: [[AXP34:%.*]] = fadd float [[AX32]], [[AY24]] ; CHECK-NEXT: [[AX35:%.*]] = select i1 [[CLT33]], float [[AXP34]], float [[AX32]] ; CHECK-NEXT: [[AX_UPDATE36]] = call float @llvm.ldexp.f32.i32(float [[AX35]], i32 11) ; CHECK-NEXT: [[NB_UPDATE37]] = sub i32 [[NB_IV29]], 11 ; CHECK-NEXT: [[TMP51:%.*]] = icmp sgt i32 [[NB_IV29]], 11 ; CHECK-NEXT: br i1 [[TMP51]], label %[[FREM_LOOP_BODY27]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_LOOP_EXIT28]]: ; CHECK-NEXT: [[AX_EXIT_PHI38:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_LOOP_PHI30]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[NB_EXIT_PHI39:%.*]] = phi i32 [ [[NB_IV29]], %[[FREM_LOOP_BODY27]] ], [ [[NB25]], %[[FREM_COMPUTE19]] ] ; CHECK-NEXT: [[TMP52:%.*]] = sub i32 [[NB_EXIT_PHI39]], 11 ; CHECK-NEXT: [[TMP53:%.*]] = add i32 [[TMP52]], 1 ; CHECK-NEXT: [[AX40:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI38]], i32 [[TMP53]]) ; CHECK-NEXT: [[TMP54:%.*]] = fmul float [[AX40]], [[AYINV26]] ; CHECK-NEXT: [[Q41:%.*]] = call float @llvm.rint.f32(float [[TMP54]]) ; CHECK-NEXT: [[TMP55:%.*]] = fneg float [[Q41]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.fma.f32(float [[TMP55]], float [[AY24]], float [[AX40]]) ; CHECK-NEXT: [[CLT43:%.*]] = fcmp olt float [[AX42]], 0.000000e+00 ; CHECK-NEXT: [[AXP44:%.*]] = fadd float [[AX42]], [[AY24]] ; CHECK-NEXT: [[AX45:%.*]] = select i1 [[CLT43]], float [[AXP44]], float [[AX42]] ; CHECK-NEXT: [[AX46:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX45]], i32 [[EY23]]) ; CHECK-NEXT: [[TMP56:%.*]] = fptrunc float [[AX46]] to half ; CHECK-NEXT: [[TMP57]] = call half @llvm.copysign.f16(half [[TMP56]], half [[TMP11]]) ; CHECK-NEXT: br label %[[BB14]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x half>, ptr addrspace(1) %in2, i32 4 %r0 = load <2 x half>, ptr addrspace(1) %in1, align 8 %r1 = load <2 x half>, ptr addrspace(1) %gep2, align 8 %r2 = frem <2 x half> %r0, %r1 store <2 x half> %r2, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @frem_v4f16(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_v4f16( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <4 x half>, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load <4 x half>, ptr addrspace(1) [[IN1]], align 16 ; CHECK-NEXT: [[R1:%.*]] = load <4 x half>, ptr addrspace(1) [[GEP2]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x half> [[R0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x half> [[R1]], i64 0 ; CHECK-NEXT: [[AX:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call half @llvm.fabs.f16(half [[TMP2]]) ; CHECK-NEXT: [[AX1:%.*]] = fpext half [[AX]] to float ; CHECK-NEXT: [[AY2:%.*]] = fpext half [[AY]] to float ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX1]], [[AY2]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB4:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi half [ [[TMP58:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP49:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq half [[TMP2]], 0xH0000 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], half 0xH7E00, half [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call half @llvm.fabs.f16(half [[TMP1]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ult half [[TMP7]], 0xH7C00 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], half [[TMP6]], half 0xH7E00 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x half> poison, half [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x half> [[R0]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x half> [[R1]], i64 1 ; CHECK-NEXT: [[AX14:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) ; CHECK-NEXT: [[AY15:%.*]] = call half @llvm.fabs.f16(half [[TMP12]]) ; CHECK-NEXT: [[AX16:%.*]] = fpext half [[AX14]] to float ; CHECK-NEXT: [[AY17:%.*]] = fpext half [[AY15]] to float ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX16]], [[AY17]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE19:.*]], label %[[FREM_ELSE20:.*]] ; CHECK: [[BB14:.*]]: ; CHECK-NEXT: [[RET18:%.*]] = phi half [ [[TMP77:%.*]], %[[FREM_LOOP_EXIT28:.*]] ], [ [[TMP68:%.*]], %[[FREM_ELSE20]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq half [[TMP12]], 0xH0000 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], half 0xH7E00, half [[RET18]] ; CHECK-NEXT: [[TMP17:%.*]] = call half @llvm.fabs.f16(half [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp ult half [[TMP17]], 0xH7C00 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], half [[TMP16]], half 0xH7E00 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x half> [[TMP10]], half [[TMP19]], i64 1 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x half> [[R0]], i64 2 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x half> [[R1]], i64 2 ; CHECK-NEXT: [[AX47:%.*]] = call half @llvm.fabs.f16(half [[TMP21]]) ; CHECK-NEXT: [[AY48:%.*]] = call half @llvm.fabs.f16(half [[TMP22]]) ; CHECK-NEXT: [[AX49:%.*]] = fpext half [[AX47]] to float ; CHECK-NEXT: [[AY50:%.*]] = fpext half [[AY48]] to float ; CHECK-NEXT: [[TMP23:%.*]] = fcmp ogt float [[AX49]], [[AY50]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE52:.*]], label %[[FREM_ELSE53:.*]] ; CHECK: [[BB24:.*]]: ; CHECK-NEXT: [[RET51:%.*]] = phi half [ [[TMP96:%.*]], %[[FREM_LOOP_EXIT61:.*]] ], [ [[TMP87:%.*]], %[[FREM_ELSE53]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fcmp ueq half [[TMP22]], 0xH0000 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], half 0xH7E00, half [[RET51]] ; CHECK-NEXT: [[TMP27:%.*]] = call half @llvm.fabs.f16(half [[TMP21]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp ult half [[TMP27]], 0xH7C00 ; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], half [[TMP26]], half 0xH7E00 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x half> [[TMP20]], half [[TMP29]], i64 2 ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x half> [[R0]], i64 3 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x half> [[R1]], i64 3 ; CHECK-NEXT: [[AX80:%.*]] = call half @llvm.fabs.f16(half [[TMP31]]) ; CHECK-NEXT: [[AY81:%.*]] = call half @llvm.fabs.f16(half [[TMP32]]) ; CHECK-NEXT: [[AX82:%.*]] = fpext half [[AX80]] to float ; CHECK-NEXT: [[AY83:%.*]] = fpext half [[AY81]] to float ; CHECK-NEXT: [[TMP33:%.*]] = fcmp ogt float [[AX82]], [[AY83]] ; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE85:.*]], label %[[FREM_ELSE86:.*]] ; CHECK: [[BB34:.*]]: ; CHECK-NEXT: [[RET84:%.*]] = phi half [ [[TMP115:%.*]], %[[FREM_LOOP_EXIT94:.*]] ], [ [[TMP106:%.*]], %[[FREM_ELSE86]] ] ; CHECK-NEXT: [[TMP35:%.*]] = fcmp ueq half [[TMP32]], 0xH0000 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], half 0xH7E00, half [[RET84]] ; CHECK-NEXT: [[TMP37:%.*]] = call half @llvm.fabs.f16(half [[TMP31]]) ; CHECK-NEXT: [[TMP38:%.*]] = fcmp ult half [[TMP37]], 0xH7C00 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], half [[TMP36]], half 0xH7E00 ; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x half> [[TMP30]], half [[TMP39]], i64 3 ; CHECK-NEXT: store <4 x half> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX1]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP42]], 1 ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP41]], i32 11) ; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY2]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP43]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP45]], 1 ; CHECK-NEXT: [[AY4:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP44]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY4]] ; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[NB]], 11 ; CHECK-NEXT: br i1 [[TMP46]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP47:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP1]]) ; CHECK-NEXT: [[TMP48:%.*]] = fcmp oeq float [[AX1]], [[AY2]] ; CHECK-NEXT: [[TMP49]] = select i1 [[TMP48]], half [[TMP47]], half [[TMP1]] ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP50:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP50]]) ; CHECK-NEXT: [[TMP51:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.fma.f32(float [[TMP51]], float [[AY4]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX5]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX5]], [[AY4]] ; CHECK-NEXT: [[AX6:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX5]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX6]], i32 11) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 11 ; CHECK-NEXT: [[TMP52:%.*]] = icmp sgt i32 [[NB_IV]], 11 ; CHECK-NEXT: br i1 [[TMP52]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX3]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP53:%.*]] = sub i32 [[NB_EXIT_PHI]], 11 ; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 1 ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP54]]) ; CHECK-NEXT: [[TMP55:%.*]] = fmul float [[AX7]], [[AYINV]] ; CHECK-NEXT: [[Q8:%.*]] = call float @llvm.rint.f32(float [[TMP55]]) ; CHECK-NEXT: [[TMP56:%.*]] = fneg float [[Q8]] ; CHECK-NEXT: [[AX9:%.*]] = call float @llvm.fma.f32(float [[TMP56]], float [[AY4]], float [[AX7]]) ; CHECK-NEXT: [[CLT10:%.*]] = fcmp olt float [[AX9]], 0.000000e+00 ; CHECK-NEXT: [[AXP11:%.*]] = fadd float [[AX9]], [[AY4]] ; CHECK-NEXT: [[AX12:%.*]] = select i1 [[CLT10]], float [[AXP11]], float [[AX9]] ; CHECK-NEXT: [[AX13:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX12]], i32 [[EY]]) ; CHECK-NEXT: [[TMP57:%.*]] = fptrunc float [[AX13]] to half ; CHECK-NEXT: [[TMP58]] = call half @llvm.copysign.f16(half [[TMP57]], half [[TMP1]]) ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_COMPUTE19]]: ; CHECK-NEXT: [[TMP59:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX16]]) ; CHECK-NEXT: [[TMP60:%.*]] = extractvalue { float, i32 } [[TMP59]], 0 ; CHECK-NEXT: [[TMP61:%.*]] = extractvalue { float, i32 } [[TMP59]], 1 ; CHECK-NEXT: [[EX21:%.*]] = sub i32 [[TMP61]], 1 ; CHECK-NEXT: [[AX22:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP60]], i32 11) ; CHECK-NEXT: [[TMP62:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY17]]) ; CHECK-NEXT: [[TMP63:%.*]] = extractvalue { float, i32 } [[TMP62]], 0 ; CHECK-NEXT: [[TMP64:%.*]] = extractvalue { float, i32 } [[TMP62]], 1 ; CHECK-NEXT: [[EY23:%.*]] = sub i32 [[TMP64]], 1 ; CHECK-NEXT: [[AY24:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP63]], i32 1) ; CHECK-NEXT: [[NB25:%.*]] = sub i32 [[EX21]], [[EY23]] ; CHECK-NEXT: [[AYINV26:%.*]] = fdiv float 1.000000e+00, [[AY24]] ; CHECK-NEXT: [[TMP65:%.*]] = icmp sgt i32 [[NB25]], 11 ; CHECK-NEXT: br i1 [[TMP65]], label %[[FREM_LOOP_BODY27:.*]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_ELSE20]]: ; CHECK-NEXT: [[TMP66:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP11]]) ; CHECK-NEXT: [[TMP67:%.*]] = fcmp oeq float [[AX16]], [[AY17]] ; CHECK-NEXT: [[TMP68]] = select i1 [[TMP67]], half [[TMP66]], half [[TMP11]] ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY27]]: ; CHECK-NEXT: [[NB_IV29:%.*]] = phi i32 [ [[NB25]], %[[FREM_COMPUTE19]] ], [ [[NB_UPDATE37:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[AX_LOOP_PHI30:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_UPDATE36:%.*]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[TMP69:%.*]] = fmul float [[AX_LOOP_PHI30]], [[AYINV26]] ; CHECK-NEXT: [[Q31:%.*]] = call float @llvm.rint.f32(float [[TMP69]]) ; CHECK-NEXT: [[TMP70:%.*]] = fneg float [[Q31]] ; CHECK-NEXT: [[AX32:%.*]] = call float @llvm.fma.f32(float [[TMP70]], float [[AY24]], float [[AX_LOOP_PHI30]]) ; CHECK-NEXT: [[CLT33:%.*]] = fcmp olt float [[AX32]], 0.000000e+00 ; CHECK-NEXT: [[AXP34:%.*]] = fadd float [[AX32]], [[AY24]] ; CHECK-NEXT: [[AX35:%.*]] = select i1 [[CLT33]], float [[AXP34]], float [[AX32]] ; CHECK-NEXT: [[AX_UPDATE36]] = call float @llvm.ldexp.f32.i32(float [[AX35]], i32 11) ; CHECK-NEXT: [[NB_UPDATE37]] = sub i32 [[NB_IV29]], 11 ; CHECK-NEXT: [[TMP71:%.*]] = icmp sgt i32 [[NB_IV29]], 11 ; CHECK-NEXT: br i1 [[TMP71]], label %[[FREM_LOOP_BODY27]], label %[[FREM_LOOP_EXIT28]] ; CHECK: [[FREM_LOOP_EXIT28]]: ; CHECK-NEXT: [[AX_EXIT_PHI38:%.*]] = phi float [ [[AX22]], %[[FREM_COMPUTE19]] ], [ [[AX_LOOP_PHI30]], %[[FREM_LOOP_BODY27]] ] ; CHECK-NEXT: [[NB_EXIT_PHI39:%.*]] = phi i32 [ [[NB_IV29]], %[[FREM_LOOP_BODY27]] ], [ [[NB25]], %[[FREM_COMPUTE19]] ] ; CHECK-NEXT: [[TMP72:%.*]] = sub i32 [[NB_EXIT_PHI39]], 11 ; CHECK-NEXT: [[TMP73:%.*]] = add i32 [[TMP72]], 1 ; CHECK-NEXT: [[AX40:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI38]], i32 [[TMP73]]) ; CHECK-NEXT: [[TMP74:%.*]] = fmul float [[AX40]], [[AYINV26]] ; CHECK-NEXT: [[Q41:%.*]] = call float @llvm.rint.f32(float [[TMP74]]) ; CHECK-NEXT: [[TMP75:%.*]] = fneg float [[Q41]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.fma.f32(float [[TMP75]], float [[AY24]], float [[AX40]]) ; CHECK-NEXT: [[CLT43:%.*]] = fcmp olt float [[AX42]], 0.000000e+00 ; CHECK-NEXT: [[AXP44:%.*]] = fadd float [[AX42]], [[AY24]] ; CHECK-NEXT: [[AX45:%.*]] = select i1 [[CLT43]], float [[AXP44]], float [[AX42]] ; CHECK-NEXT: [[AX46:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX45]], i32 [[EY23]]) ; CHECK-NEXT: [[TMP76:%.*]] = fptrunc float [[AX46]] to half ; CHECK-NEXT: [[TMP77]] = call half @llvm.copysign.f16(half [[TMP76]], half [[TMP11]]) ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE52]]: ; CHECK-NEXT: [[TMP78:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX49]]) ; CHECK-NEXT: [[TMP79:%.*]] = extractvalue { float, i32 } [[TMP78]], 0 ; CHECK-NEXT: [[TMP80:%.*]] = extractvalue { float, i32 } [[TMP78]], 1 ; CHECK-NEXT: [[EX54:%.*]] = sub i32 [[TMP80]], 1 ; CHECK-NEXT: [[AX55:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP79]], i32 11) ; CHECK-NEXT: [[TMP81:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY50]]) ; CHECK-NEXT: [[TMP82:%.*]] = extractvalue { float, i32 } [[TMP81]], 0 ; CHECK-NEXT: [[TMP83:%.*]] = extractvalue { float, i32 } [[TMP81]], 1 ; CHECK-NEXT: [[EY56:%.*]] = sub i32 [[TMP83]], 1 ; CHECK-NEXT: [[AY57:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP82]], i32 1) ; CHECK-NEXT: [[NB58:%.*]] = sub i32 [[EX54]], [[EY56]] ; CHECK-NEXT: [[AYINV59:%.*]] = fdiv float 1.000000e+00, [[AY57]] ; CHECK-NEXT: [[TMP84:%.*]] = icmp sgt i32 [[NB58]], 11 ; CHECK-NEXT: br i1 [[TMP84]], label %[[FREM_LOOP_BODY60:.*]], label %[[FREM_LOOP_EXIT61]] ; CHECK: [[FREM_ELSE53]]: ; CHECK-NEXT: [[TMP85:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP21]]) ; CHECK-NEXT: [[TMP86:%.*]] = fcmp oeq float [[AX49]], [[AY50]] ; CHECK-NEXT: [[TMP87]] = select i1 [[TMP86]], half [[TMP85]], half [[TMP21]] ; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_LOOP_BODY60]]: ; CHECK-NEXT: [[NB_IV62:%.*]] = phi i32 [ [[NB58]], %[[FREM_COMPUTE52]] ], [ [[NB_UPDATE70:%.*]], %[[FREM_LOOP_BODY60]] ] ; CHECK-NEXT: [[AX_LOOP_PHI63:%.*]] = phi float [ [[AX55]], %[[FREM_COMPUTE52]] ], [ [[AX_UPDATE69:%.*]], %[[FREM_LOOP_BODY60]] ] ; CHECK-NEXT: [[TMP88:%.*]] = fmul float [[AX_LOOP_PHI63]], [[AYINV59]] ; CHECK-NEXT: [[Q64:%.*]] = call float @llvm.rint.f32(float [[TMP88]]) ; CHECK-NEXT: [[TMP89:%.*]] = fneg float [[Q64]] ; CHECK-NEXT: [[AX65:%.*]] = call float @llvm.fma.f32(float [[TMP89]], float [[AY57]], float [[AX_LOOP_PHI63]]) ; CHECK-NEXT: [[CLT66:%.*]] = fcmp olt float [[AX65]], 0.000000e+00 ; CHECK-NEXT: [[AXP67:%.*]] = fadd float [[AX65]], [[AY57]] ; CHECK-NEXT: [[AX68:%.*]] = select i1 [[CLT66]], float [[AXP67]], float [[AX65]] ; CHECK-NEXT: [[AX_UPDATE69]] = call float @llvm.ldexp.f32.i32(float [[AX68]], i32 11) ; CHECK-NEXT: [[NB_UPDATE70]] = sub i32 [[NB_IV62]], 11 ; CHECK-NEXT: [[TMP90:%.*]] = icmp sgt i32 [[NB_IV62]], 11 ; CHECK-NEXT: br i1 [[TMP90]], label %[[FREM_LOOP_BODY60]], label %[[FREM_LOOP_EXIT61]] ; CHECK: [[FREM_LOOP_EXIT61]]: ; CHECK-NEXT: [[AX_EXIT_PHI71:%.*]] = phi float [ [[AX55]], %[[FREM_COMPUTE52]] ], [ [[AX_LOOP_PHI63]], %[[FREM_LOOP_BODY60]] ] ; CHECK-NEXT: [[NB_EXIT_PHI72:%.*]] = phi i32 [ [[NB_IV62]], %[[FREM_LOOP_BODY60]] ], [ [[NB58]], %[[FREM_COMPUTE52]] ] ; CHECK-NEXT: [[TMP91:%.*]] = sub i32 [[NB_EXIT_PHI72]], 11 ; CHECK-NEXT: [[TMP92:%.*]] = add i32 [[TMP91]], 1 ; CHECK-NEXT: [[AX73:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI71]], i32 [[TMP92]]) ; CHECK-NEXT: [[TMP93:%.*]] = fmul float [[AX73]], [[AYINV59]] ; CHECK-NEXT: [[Q74:%.*]] = call float @llvm.rint.f32(float [[TMP93]]) ; CHECK-NEXT: [[TMP94:%.*]] = fneg float [[Q74]] ; CHECK-NEXT: [[AX75:%.*]] = call float @llvm.fma.f32(float [[TMP94]], float [[AY57]], float [[AX73]]) ; CHECK-NEXT: [[CLT76:%.*]] = fcmp olt float [[AX75]], 0.000000e+00 ; CHECK-NEXT: [[AXP77:%.*]] = fadd float [[AX75]], [[AY57]] ; CHECK-NEXT: [[AX78:%.*]] = select i1 [[CLT76]], float [[AXP77]], float [[AX75]] ; CHECK-NEXT: [[AX79:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX78]], i32 [[EY56]]) ; CHECK-NEXT: [[TMP95:%.*]] = fptrunc float [[AX79]] to half ; CHECK-NEXT: [[TMP96]] = call half @llvm.copysign.f16(half [[TMP95]], half [[TMP21]]) ; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_COMPUTE85]]: ; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX82]]) ; CHECK-NEXT: [[TMP98:%.*]] = extractvalue { float, i32 } [[TMP97]], 0 ; CHECK-NEXT: [[TMP99:%.*]] = extractvalue { float, i32 } [[TMP97]], 1 ; CHECK-NEXT: [[EX87:%.*]] = sub i32 [[TMP99]], 1 ; CHECK-NEXT: [[AX88:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP98]], i32 11) ; CHECK-NEXT: [[TMP100:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY83]]) ; CHECK-NEXT: [[TMP101:%.*]] = extractvalue { float, i32 } [[TMP100]], 0 ; CHECK-NEXT: [[TMP102:%.*]] = extractvalue { float, i32 } [[TMP100]], 1 ; CHECK-NEXT: [[EY89:%.*]] = sub i32 [[TMP102]], 1 ; CHECK-NEXT: [[AY90:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP101]], i32 1) ; CHECK-NEXT: [[NB91:%.*]] = sub i32 [[EX87]], [[EY89]] ; CHECK-NEXT: [[AYINV92:%.*]] = fdiv float 1.000000e+00, [[AY90]] ; CHECK-NEXT: [[TMP103:%.*]] = icmp sgt i32 [[NB91]], 11 ; CHECK-NEXT: br i1 [[TMP103]], label %[[FREM_LOOP_BODY93:.*]], label %[[FREM_LOOP_EXIT94]] ; CHECK: [[FREM_ELSE86]]: ; CHECK-NEXT: [[TMP104:%.*]] = call half @llvm.copysign.f16(half 0xH0000, half [[TMP31]]) ; CHECK-NEXT: [[TMP105:%.*]] = fcmp oeq float [[AX82]], [[AY83]] ; CHECK-NEXT: [[TMP106]] = select i1 [[TMP105]], half [[TMP104]], half [[TMP31]] ; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_LOOP_BODY93]]: ; CHECK-NEXT: [[NB_IV95:%.*]] = phi i32 [ [[NB91]], %[[FREM_COMPUTE85]] ], [ [[NB_UPDATE103:%.*]], %[[FREM_LOOP_BODY93]] ] ; CHECK-NEXT: [[AX_LOOP_PHI96:%.*]] = phi float [ [[AX88]], %[[FREM_COMPUTE85]] ], [ [[AX_UPDATE102:%.*]], %[[FREM_LOOP_BODY93]] ] ; CHECK-NEXT: [[TMP107:%.*]] = fmul float [[AX_LOOP_PHI96]], [[AYINV92]] ; CHECK-NEXT: [[Q97:%.*]] = call float @llvm.rint.f32(float [[TMP107]]) ; CHECK-NEXT: [[TMP108:%.*]] = fneg float [[Q97]] ; CHECK-NEXT: [[AX98:%.*]] = call float @llvm.fma.f32(float [[TMP108]], float [[AY90]], float [[AX_LOOP_PHI96]]) ; CHECK-NEXT: [[CLT99:%.*]] = fcmp olt float [[AX98]], 0.000000e+00 ; CHECK-NEXT: [[AXP100:%.*]] = fadd float [[AX98]], [[AY90]] ; CHECK-NEXT: [[AX101:%.*]] = select i1 [[CLT99]], float [[AXP100]], float [[AX98]] ; CHECK-NEXT: [[AX_UPDATE102]] = call float @llvm.ldexp.f32.i32(float [[AX101]], i32 11) ; CHECK-NEXT: [[NB_UPDATE103]] = sub i32 [[NB_IV95]], 11 ; CHECK-NEXT: [[TMP109:%.*]] = icmp sgt i32 [[NB_IV95]], 11 ; CHECK-NEXT: br i1 [[TMP109]], label %[[FREM_LOOP_BODY93]], label %[[FREM_LOOP_EXIT94]] ; CHECK: [[FREM_LOOP_EXIT94]]: ; CHECK-NEXT: [[AX_EXIT_PHI104:%.*]] = phi float [ [[AX88]], %[[FREM_COMPUTE85]] ], [ [[AX_LOOP_PHI96]], %[[FREM_LOOP_BODY93]] ] ; CHECK-NEXT: [[NB_EXIT_PHI105:%.*]] = phi i32 [ [[NB_IV95]], %[[FREM_LOOP_BODY93]] ], [ [[NB91]], %[[FREM_COMPUTE85]] ] ; CHECK-NEXT: [[TMP110:%.*]] = sub i32 [[NB_EXIT_PHI105]], 11 ; CHECK-NEXT: [[TMP111:%.*]] = add i32 [[TMP110]], 1 ; CHECK-NEXT: [[AX106:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI104]], i32 [[TMP111]]) ; CHECK-NEXT: [[TMP112:%.*]] = fmul float [[AX106]], [[AYINV92]] ; CHECK-NEXT: [[Q107:%.*]] = call float @llvm.rint.f32(float [[TMP112]]) ; CHECK-NEXT: [[TMP113:%.*]] = fneg float [[Q107]] ; CHECK-NEXT: [[AX108:%.*]] = call float @llvm.fma.f32(float [[TMP113]], float [[AY90]], float [[AX106]]) ; CHECK-NEXT: [[CLT109:%.*]] = fcmp olt float [[AX108]], 0.000000e+00 ; CHECK-NEXT: [[AXP110:%.*]] = fadd float [[AX108]], [[AY90]] ; CHECK-NEXT: [[AX111:%.*]] = select i1 [[CLT109]], float [[AXP110]], float [[AX108]] ; CHECK-NEXT: [[AX112:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX111]], i32 [[EY89]]) ; CHECK-NEXT: [[TMP114:%.*]] = fptrunc float [[AX112]] to half ; CHECK-NEXT: [[TMP115]] = call half @llvm.copysign.f16(half [[TMP114]], half [[TMP31]]) ; CHECK-NEXT: br label %[[BB34]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <4 x half>, ptr addrspace(1) %in2, i32 4 %r0 = load <4 x half>, ptr addrspace(1) %in1, align 16 %r1 = load <4 x half>, ptr addrspace(1) %gep2, align 16 %r2 = frem <4 x half> %r0, %r1 store <4 x half> %r2, ptr addrspace(1) %out, align 16 ret void } define amdgpu_kernel void @frem_v2f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_v2f32( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <2 x float>, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load <2 x float>, ptr addrspace(1) [[IN1]], align 8 ; CHECK-NEXT: [[R1:%.*]] = load <2 x float>, ptr addrspace(1) [[GEP2]], align 8 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x float> [[R0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x float> [[R1]], i64 0 ; CHECK-NEXT: [[AX:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call float @llvm.fabs.f32(float [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX]], [[AY]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB4:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP37:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq float [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float 0x7FF8000000000000, float [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ult float [[TMP7]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float 0x7FF8000000000000 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x float> poison, float [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x float> [[R0]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x float> [[R1]], i64 1 ; CHECK-NEXT: [[AX12:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX12]], [[AY13]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB14:.*]]: ; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP55:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP47:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float 0x7FF8000000000000, float [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp ult float [[TMP17]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], float [[TMP16]], float 0x7FF8000000000000 ; CHECK-NEXT: [[R2:%.*]] = insertelement <2 x float> [[TMP10]], float [[TMP19]], i64 1 ; CHECK-NEXT: store <2 x float> [[R2]], ptr addrspace(1) [[OUT]], align 8 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP20:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { float, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { float, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP21]], i32 12) ; CHECK-NEXT: [[TMP23:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { float, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { float, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 ; CHECK-NEXT: [[AY2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP24]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY2]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 12 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP27:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq float [[AX]], [[AY]] ; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], float [[TMP27]], float [[TMP1]] ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP30:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP30]]) ; CHECK-NEXT: [[TMP31:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.fma.f32(float [[TMP31]], float [[AY2]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX3]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX3]], [[AY2]] ; CHECK-NEXT: [[AX4:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX3]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX4]], i32 12) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 12 ; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[NB_IV]], 12 ; CHECK-NEXT: br i1 [[TMP32]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[NB_EXIT_PHI]], 12 ; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], 1 ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP34]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul float [[AX5]], [[AYINV]] ; CHECK-NEXT: [[Q6:%.*]] = call float @llvm.rint.f32(float [[TMP35]]) ; CHECK-NEXT: [[TMP36:%.*]] = fneg float [[Q6]] ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.fma.f32(float [[TMP36]], float [[AY2]], float [[AX5]]) ; CHECK-NEXT: [[CLT8:%.*]] = fcmp olt float [[AX7]], 0.000000e+00 ; CHECK-NEXT: [[AXP9:%.*]] = fadd float [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], float [[AXP9]], float [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX10]], i32 [[EY]]) ; CHECK-NEXT: [[TMP37]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP1]]) ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_COMPUTE15]]: ; CHECK-NEXT: [[TMP38:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractvalue { float, i32 } [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { float, i32 } [[TMP38]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP40]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP39]], i32 12) ; CHECK-NEXT: [[TMP41:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP41]], 0 ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { float, i32 } [[TMP41]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP43]], 1 ; CHECK-NEXT: [[AY20:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP42]], i32 1) ; CHECK-NEXT: [[NB21:%.*]] = sub i32 [[EX17]], [[EY19]] ; CHECK-NEXT: [[AYINV22:%.*]] = fdiv float 1.000000e+00, [[AY20]] ; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[NB21]], 12 ; CHECK-NEXT: br i1 [[TMP44]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: ; CHECK-NEXT: [[TMP45:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) ; CHECK-NEXT: [[TMP46:%.*]] = fcmp oeq float [[AX12]], [[AY13]] ; CHECK-NEXT: [[TMP47]] = select i1 [[TMP46]], float [[TMP45]], float [[TMP11]] ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[TMP48:%.*]] = fmul float [[AX_LOOP_PHI26]], [[AYINV22]] ; CHECK-NEXT: [[Q27:%.*]] = call float @llvm.rint.f32(float [[TMP48]]) ; CHECK-NEXT: [[TMP49:%.*]] = fneg float [[Q27]] ; CHECK-NEXT: [[AX28:%.*]] = call float @llvm.fma.f32(float [[TMP49]], float [[AY20]], float [[AX_LOOP_PHI26]]) ; CHECK-NEXT: [[CLT29:%.*]] = fcmp olt float [[AX28]], 0.000000e+00 ; CHECK-NEXT: [[AXP30:%.*]] = fadd float [[AX28]], [[AY20]] ; CHECK-NEXT: [[AX31:%.*]] = select i1 [[CLT29]], float [[AXP30]], float [[AX28]] ; CHECK-NEXT: [[AX_UPDATE32]] = call float @llvm.ldexp.f32.i32(float [[AX31]], i32 12) ; CHECK-NEXT: [[NB_UPDATE33]] = sub i32 [[NB_IV25]], 12 ; CHECK-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[NB_IV25]], 12 ; CHECK-NEXT: br i1 [[TMP50]], label %[[FREM_LOOP_BODY23]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_LOOP_EXIT24]]: ; CHECK-NEXT: [[AX_EXIT_PHI34:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_LOOP_PHI26]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[NB_EXIT_PHI35:%.*]] = phi i32 [ [[NB_IV25]], %[[FREM_LOOP_BODY23]] ], [ [[NB21]], %[[FREM_COMPUTE15]] ] ; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[NB_EXIT_PHI35]], 12 ; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 1 ; CHECK-NEXT: [[AX36:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI34]], i32 [[TMP52]]) ; CHECK-NEXT: [[TMP53:%.*]] = fmul float [[AX36]], [[AYINV22]] ; CHECK-NEXT: [[Q37:%.*]] = call float @llvm.rint.f32(float [[TMP53]]) ; CHECK-NEXT: [[TMP54:%.*]] = fneg float [[Q37]] ; CHECK-NEXT: [[AX38:%.*]] = call float @llvm.fma.f32(float [[TMP54]], float [[AY20]], float [[AX36]]) ; CHECK-NEXT: [[CLT39:%.*]] = fcmp olt float [[AX38]], 0.000000e+00 ; CHECK-NEXT: [[AXP40:%.*]] = fadd float [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], float [[AXP40]], float [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX41]], i32 [[EY19]]) ; CHECK-NEXT: [[TMP55]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP11]]) ; CHECK-NEXT: br label %[[BB14]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x float>, ptr addrspace(1) %in2, i32 4 %r0 = load <2 x float>, ptr addrspace(1) %in1, align 8 %r1 = load <2 x float>, ptr addrspace(1) %gep2, align 8 %r2 = frem <2 x float> %r0, %r1 store <2 x float> %r2, ptr addrspace(1) %out, align 8 ret void } define amdgpu_kernel void @frem_v4f32(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_v4f32( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <4 x float>, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load <4 x float>, ptr addrspace(1) [[IN1]], align 16 ; CHECK-NEXT: [[R1:%.*]] = load <4 x float>, ptr addrspace(1) [[GEP2]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[R0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[R1]], i64 0 ; CHECK-NEXT: [[AX:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call float @llvm.fabs.f32(float [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt float [[AX]], [[AY]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB4:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi float [ [[TMP57:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP49:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq float [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], float 0x7FF8000000000000, float [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.fabs.f32(float [[TMP1]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ult float [[TMP7]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP6]], float 0x7FF8000000000000 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> poison, float [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float> [[R0]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <4 x float> [[R1]], i64 1 ; CHECK-NEXT: [[AX12:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call float @llvm.fabs.f32(float [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt float [[AX12]], [[AY13]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB14:.*]]: ; CHECK-NEXT: [[RET14:%.*]] = phi float [ [[TMP75:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP67:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq float [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], float 0x7FF8000000000000, float [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call float @llvm.fabs.f32(float [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp ult float [[TMP17]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], float [[TMP16]], float 0x7FF8000000000000 ; CHECK-NEXT: [[TMP20:%.*]] = insertelement <4 x float> [[TMP10]], float [[TMP19]], i64 1 ; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x float> [[R0]], i64 2 ; CHECK-NEXT: [[TMP22:%.*]] = extractelement <4 x float> [[R1]], i64 2 ; CHECK-NEXT: [[AX43:%.*]] = call float @llvm.fabs.f32(float [[TMP21]]) ; CHECK-NEXT: [[AY44:%.*]] = call float @llvm.fabs.f32(float [[TMP22]]) ; CHECK-NEXT: [[TMP23:%.*]] = fcmp ogt float [[AX43]], [[AY44]] ; CHECK-NEXT: br i1 [[TMP23]], label %[[FREM_COMPUTE46:.*]], label %[[FREM_ELSE47:.*]] ; CHECK: [[BB24:.*]]: ; CHECK-NEXT: [[RET45:%.*]] = phi float [ [[TMP93:%.*]], %[[FREM_LOOP_EXIT55:.*]] ], [ [[TMP85:%.*]], %[[FREM_ELSE47]] ] ; CHECK-NEXT: [[TMP25:%.*]] = fcmp ueq float [[TMP22]], 0.000000e+00 ; CHECK-NEXT: [[TMP26:%.*]] = select i1 [[TMP25]], float 0x7FF8000000000000, float [[RET45]] ; CHECK-NEXT: [[TMP27:%.*]] = call float @llvm.fabs.f32(float [[TMP21]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp ult float [[TMP27]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP29:%.*]] = select i1 [[TMP28]], float [[TMP26]], float 0x7FF8000000000000 ; CHECK-NEXT: [[TMP30:%.*]] = insertelement <4 x float> [[TMP20]], float [[TMP29]], i64 2 ; CHECK-NEXT: [[TMP31:%.*]] = extractelement <4 x float> [[R0]], i64 3 ; CHECK-NEXT: [[TMP32:%.*]] = extractelement <4 x float> [[R1]], i64 3 ; CHECK-NEXT: [[AX74:%.*]] = call float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[AY75:%.*]] = call float @llvm.fabs.f32(float [[TMP32]]) ; CHECK-NEXT: [[TMP33:%.*]] = fcmp ogt float [[AX74]], [[AY75]] ; CHECK-NEXT: br i1 [[TMP33]], label %[[FREM_COMPUTE77:.*]], label %[[FREM_ELSE78:.*]] ; CHECK: [[BB34:.*]]: ; CHECK-NEXT: [[RET76:%.*]] = phi float [ [[TMP111:%.*]], %[[FREM_LOOP_EXIT86:.*]] ], [ [[TMP103:%.*]], %[[FREM_ELSE78]] ] ; CHECK-NEXT: [[TMP35:%.*]] = fcmp ueq float [[TMP32]], 0.000000e+00 ; CHECK-NEXT: [[TMP36:%.*]] = select i1 [[TMP35]], float 0x7FF8000000000000, float [[RET76]] ; CHECK-NEXT: [[TMP37:%.*]] = call float @llvm.fabs.f32(float [[TMP31]]) ; CHECK-NEXT: [[TMP38:%.*]] = fcmp ult float [[TMP37]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP39:%.*]] = select i1 [[TMP38]], float [[TMP36]], float 0x7FF8000000000000 ; CHECK-NEXT: [[R2:%.*]] = insertelement <4 x float> [[TMP30]], float [[TMP39]], i64 3 ; CHECK-NEXT: store <4 x float> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP40:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX]]) ; CHECK-NEXT: [[TMP41:%.*]] = extractvalue { float, i32 } [[TMP40]], 0 ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { float, i32 } [[TMP40]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP42]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP41]], i32 12) ; CHECK-NEXT: [[TMP43:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY]]) ; CHECK-NEXT: [[TMP44:%.*]] = extractvalue { float, i32 } [[TMP43]], 0 ; CHECK-NEXT: [[TMP45:%.*]] = extractvalue { float, i32 } [[TMP43]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP45]], 1 ; CHECK-NEXT: [[AY2:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP44]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv float 1.000000e+00, [[AY2]] ; CHECK-NEXT: [[TMP46:%.*]] = icmp sgt i32 [[NB]], 12 ; CHECK-NEXT: br i1 [[TMP46]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP47:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP1]]) ; CHECK-NEXT: [[TMP48:%.*]] = fcmp oeq float [[AX]], [[AY]] ; CHECK-NEXT: [[TMP49]] = select i1 [[TMP48]], float [[TMP47]], float [[TMP1]] ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP50:%.*]] = fmul float [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call float @llvm.rint.f32(float [[TMP50]]) ; CHECK-NEXT: [[TMP51:%.*]] = fneg float [[Q]] ; CHECK-NEXT: [[AX3:%.*]] = call float @llvm.fma.f32(float [[TMP51]], float [[AY2]], float [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt float [[AX3]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd float [[AX3]], [[AY2]] ; CHECK-NEXT: [[AX4:%.*]] = select i1 [[CLT]], float [[AXP]], float [[AX3]] ; CHECK-NEXT: [[AX_UPDATE]] = call float @llvm.ldexp.f32.i32(float [[AX4]], i32 12) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 12 ; CHECK-NEXT: [[TMP52:%.*]] = icmp sgt i32 [[NB_IV]], 12 ; CHECK-NEXT: br i1 [[TMP52]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi float [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP53:%.*]] = sub i32 [[NB_EXIT_PHI]], 12 ; CHECK-NEXT: [[TMP54:%.*]] = add i32 [[TMP53]], 1 ; CHECK-NEXT: [[AX5:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI]], i32 [[TMP54]]) ; CHECK-NEXT: [[TMP55:%.*]] = fmul float [[AX5]], [[AYINV]] ; CHECK-NEXT: [[Q6:%.*]] = call float @llvm.rint.f32(float [[TMP55]]) ; CHECK-NEXT: [[TMP56:%.*]] = fneg float [[Q6]] ; CHECK-NEXT: [[AX7:%.*]] = call float @llvm.fma.f32(float [[TMP56]], float [[AY2]], float [[AX5]]) ; CHECK-NEXT: [[CLT8:%.*]] = fcmp olt float [[AX7]], 0.000000e+00 ; CHECK-NEXT: [[AXP9:%.*]] = fadd float [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], float [[AXP9]], float [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX10]], i32 [[EY]]) ; CHECK-NEXT: [[TMP57]] = call float @llvm.copysign.f32(float [[AX11]], float [[TMP1]]) ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_COMPUTE15]]: ; CHECK-NEXT: [[TMP58:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX12]]) ; CHECK-NEXT: [[TMP59:%.*]] = extractvalue { float, i32 } [[TMP58]], 0 ; CHECK-NEXT: [[TMP60:%.*]] = extractvalue { float, i32 } [[TMP58]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP60]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP59]], i32 12) ; CHECK-NEXT: [[TMP61:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY13]]) ; CHECK-NEXT: [[TMP62:%.*]] = extractvalue { float, i32 } [[TMP61]], 0 ; CHECK-NEXT: [[TMP63:%.*]] = extractvalue { float, i32 } [[TMP61]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP63]], 1 ; CHECK-NEXT: [[AY20:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP62]], i32 1) ; CHECK-NEXT: [[NB21:%.*]] = sub i32 [[EX17]], [[EY19]] ; CHECK-NEXT: [[AYINV22:%.*]] = fdiv float 1.000000e+00, [[AY20]] ; CHECK-NEXT: [[TMP64:%.*]] = icmp sgt i32 [[NB21]], 12 ; CHECK-NEXT: br i1 [[TMP64]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: ; CHECK-NEXT: [[TMP65:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP11]]) ; CHECK-NEXT: [[TMP66:%.*]] = fcmp oeq float [[AX12]], [[AY13]] ; CHECK-NEXT: [[TMP67]] = select i1 [[TMP66]], float [[TMP65]], float [[TMP11]] ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[TMP68:%.*]] = fmul float [[AX_LOOP_PHI26]], [[AYINV22]] ; CHECK-NEXT: [[Q27:%.*]] = call float @llvm.rint.f32(float [[TMP68]]) ; CHECK-NEXT: [[TMP69:%.*]] = fneg float [[Q27]] ; CHECK-NEXT: [[AX28:%.*]] = call float @llvm.fma.f32(float [[TMP69]], float [[AY20]], float [[AX_LOOP_PHI26]]) ; CHECK-NEXT: [[CLT29:%.*]] = fcmp olt float [[AX28]], 0.000000e+00 ; CHECK-NEXT: [[AXP30:%.*]] = fadd float [[AX28]], [[AY20]] ; CHECK-NEXT: [[AX31:%.*]] = select i1 [[CLT29]], float [[AXP30]], float [[AX28]] ; CHECK-NEXT: [[AX_UPDATE32]] = call float @llvm.ldexp.f32.i32(float [[AX31]], i32 12) ; CHECK-NEXT: [[NB_UPDATE33]] = sub i32 [[NB_IV25]], 12 ; CHECK-NEXT: [[TMP70:%.*]] = icmp sgt i32 [[NB_IV25]], 12 ; CHECK-NEXT: br i1 [[TMP70]], label %[[FREM_LOOP_BODY23]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_LOOP_EXIT24]]: ; CHECK-NEXT: [[AX_EXIT_PHI34:%.*]] = phi float [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_LOOP_PHI26]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[NB_EXIT_PHI35:%.*]] = phi i32 [ [[NB_IV25]], %[[FREM_LOOP_BODY23]] ], [ [[NB21]], %[[FREM_COMPUTE15]] ] ; CHECK-NEXT: [[TMP71:%.*]] = sub i32 [[NB_EXIT_PHI35]], 12 ; CHECK-NEXT: [[TMP72:%.*]] = add i32 [[TMP71]], 1 ; CHECK-NEXT: [[AX36:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI34]], i32 [[TMP72]]) ; CHECK-NEXT: [[TMP73:%.*]] = fmul float [[AX36]], [[AYINV22]] ; CHECK-NEXT: [[Q37:%.*]] = call float @llvm.rint.f32(float [[TMP73]]) ; CHECK-NEXT: [[TMP74:%.*]] = fneg float [[Q37]] ; CHECK-NEXT: [[AX38:%.*]] = call float @llvm.fma.f32(float [[TMP74]], float [[AY20]], float [[AX36]]) ; CHECK-NEXT: [[CLT39:%.*]] = fcmp olt float [[AX38]], 0.000000e+00 ; CHECK-NEXT: [[AXP40:%.*]] = fadd float [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], float [[AXP40]], float [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX41]], i32 [[EY19]]) ; CHECK-NEXT: [[TMP75]] = call float @llvm.copysign.f32(float [[AX42]], float [[TMP11]]) ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_COMPUTE46]]: ; CHECK-NEXT: [[TMP76:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX43]]) ; CHECK-NEXT: [[TMP77:%.*]] = extractvalue { float, i32 } [[TMP76]], 0 ; CHECK-NEXT: [[TMP78:%.*]] = extractvalue { float, i32 } [[TMP76]], 1 ; CHECK-NEXT: [[EX48:%.*]] = sub i32 [[TMP78]], 1 ; CHECK-NEXT: [[AX49:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP77]], i32 12) ; CHECK-NEXT: [[TMP79:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY44]]) ; CHECK-NEXT: [[TMP80:%.*]] = extractvalue { float, i32 } [[TMP79]], 0 ; CHECK-NEXT: [[TMP81:%.*]] = extractvalue { float, i32 } [[TMP79]], 1 ; CHECK-NEXT: [[EY50:%.*]] = sub i32 [[TMP81]], 1 ; CHECK-NEXT: [[AY51:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP80]], i32 1) ; CHECK-NEXT: [[NB52:%.*]] = sub i32 [[EX48]], [[EY50]] ; CHECK-NEXT: [[AYINV53:%.*]] = fdiv float 1.000000e+00, [[AY51]] ; CHECK-NEXT: [[TMP82:%.*]] = icmp sgt i32 [[NB52]], 12 ; CHECK-NEXT: br i1 [[TMP82]], label %[[FREM_LOOP_BODY54:.*]], label %[[FREM_LOOP_EXIT55]] ; CHECK: [[FREM_ELSE47]]: ; CHECK-NEXT: [[TMP83:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP21]]) ; CHECK-NEXT: [[TMP84:%.*]] = fcmp oeq float [[AX43]], [[AY44]] ; CHECK-NEXT: [[TMP85]] = select i1 [[TMP84]], float [[TMP83]], float [[TMP21]] ; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_LOOP_BODY54]]: ; CHECK-NEXT: [[NB_IV56:%.*]] = phi i32 [ [[NB52]], %[[FREM_COMPUTE46]] ], [ [[NB_UPDATE64:%.*]], %[[FREM_LOOP_BODY54]] ] ; CHECK-NEXT: [[AX_LOOP_PHI57:%.*]] = phi float [ [[AX49]], %[[FREM_COMPUTE46]] ], [ [[AX_UPDATE63:%.*]], %[[FREM_LOOP_BODY54]] ] ; CHECK-NEXT: [[TMP86:%.*]] = fmul float [[AX_LOOP_PHI57]], [[AYINV53]] ; CHECK-NEXT: [[Q58:%.*]] = call float @llvm.rint.f32(float [[TMP86]]) ; CHECK-NEXT: [[TMP87:%.*]] = fneg float [[Q58]] ; CHECK-NEXT: [[AX59:%.*]] = call float @llvm.fma.f32(float [[TMP87]], float [[AY51]], float [[AX_LOOP_PHI57]]) ; CHECK-NEXT: [[CLT60:%.*]] = fcmp olt float [[AX59]], 0.000000e+00 ; CHECK-NEXT: [[AXP61:%.*]] = fadd float [[AX59]], [[AY51]] ; CHECK-NEXT: [[AX62:%.*]] = select i1 [[CLT60]], float [[AXP61]], float [[AX59]] ; CHECK-NEXT: [[AX_UPDATE63]] = call float @llvm.ldexp.f32.i32(float [[AX62]], i32 12) ; CHECK-NEXT: [[NB_UPDATE64]] = sub i32 [[NB_IV56]], 12 ; CHECK-NEXT: [[TMP88:%.*]] = icmp sgt i32 [[NB_IV56]], 12 ; CHECK-NEXT: br i1 [[TMP88]], label %[[FREM_LOOP_BODY54]], label %[[FREM_LOOP_EXIT55]] ; CHECK: [[FREM_LOOP_EXIT55]]: ; CHECK-NEXT: [[AX_EXIT_PHI65:%.*]] = phi float [ [[AX49]], %[[FREM_COMPUTE46]] ], [ [[AX_LOOP_PHI57]], %[[FREM_LOOP_BODY54]] ] ; CHECK-NEXT: [[NB_EXIT_PHI66:%.*]] = phi i32 [ [[NB_IV56]], %[[FREM_LOOP_BODY54]] ], [ [[NB52]], %[[FREM_COMPUTE46]] ] ; CHECK-NEXT: [[TMP89:%.*]] = sub i32 [[NB_EXIT_PHI66]], 12 ; CHECK-NEXT: [[TMP90:%.*]] = add i32 [[TMP89]], 1 ; CHECK-NEXT: [[AX67:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI65]], i32 [[TMP90]]) ; CHECK-NEXT: [[TMP91:%.*]] = fmul float [[AX67]], [[AYINV53]] ; CHECK-NEXT: [[Q68:%.*]] = call float @llvm.rint.f32(float [[TMP91]]) ; CHECK-NEXT: [[TMP92:%.*]] = fneg float [[Q68]] ; CHECK-NEXT: [[AX69:%.*]] = call float @llvm.fma.f32(float [[TMP92]], float [[AY51]], float [[AX67]]) ; CHECK-NEXT: [[CLT70:%.*]] = fcmp olt float [[AX69]], 0.000000e+00 ; CHECK-NEXT: [[AXP71:%.*]] = fadd float [[AX69]], [[AY51]] ; CHECK-NEXT: [[AX72:%.*]] = select i1 [[CLT70]], float [[AXP71]], float [[AX69]] ; CHECK-NEXT: [[AX73:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX72]], i32 [[EY50]]) ; CHECK-NEXT: [[TMP93]] = call float @llvm.copysign.f32(float [[AX73]], float [[TMP21]]) ; CHECK-NEXT: br label %[[BB24]] ; CHECK: [[FREM_COMPUTE77]]: ; CHECK-NEXT: [[TMP94:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AX74]]) ; CHECK-NEXT: [[TMP95:%.*]] = extractvalue { float, i32 } [[TMP94]], 0 ; CHECK-NEXT: [[TMP96:%.*]] = extractvalue { float, i32 } [[TMP94]], 1 ; CHECK-NEXT: [[EX79:%.*]] = sub i32 [[TMP96]], 1 ; CHECK-NEXT: [[AX80:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP95]], i32 12) ; CHECK-NEXT: [[TMP97:%.*]] = call { float, i32 } @llvm.frexp.f32.i32(float [[AY75]]) ; CHECK-NEXT: [[TMP98:%.*]] = extractvalue { float, i32 } [[TMP97]], 0 ; CHECK-NEXT: [[TMP99:%.*]] = extractvalue { float, i32 } [[TMP97]], 1 ; CHECK-NEXT: [[EY81:%.*]] = sub i32 [[TMP99]], 1 ; CHECK-NEXT: [[AY82:%.*]] = call float @llvm.ldexp.f32.i32(float [[TMP98]], i32 1) ; CHECK-NEXT: [[NB83:%.*]] = sub i32 [[EX79]], [[EY81]] ; CHECK-NEXT: [[AYINV84:%.*]] = fdiv float 1.000000e+00, [[AY82]] ; CHECK-NEXT: [[TMP100:%.*]] = icmp sgt i32 [[NB83]], 12 ; CHECK-NEXT: br i1 [[TMP100]], label %[[FREM_LOOP_BODY85:.*]], label %[[FREM_LOOP_EXIT86]] ; CHECK: [[FREM_ELSE78]]: ; CHECK-NEXT: [[TMP101:%.*]] = call float @llvm.copysign.f32(float 0.000000e+00, float [[TMP31]]) ; CHECK-NEXT: [[TMP102:%.*]] = fcmp oeq float [[AX74]], [[AY75]] ; CHECK-NEXT: [[TMP103]] = select i1 [[TMP102]], float [[TMP101]], float [[TMP31]] ; CHECK-NEXT: br label %[[BB34]] ; CHECK: [[FREM_LOOP_BODY85]]: ; CHECK-NEXT: [[NB_IV87:%.*]] = phi i32 [ [[NB83]], %[[FREM_COMPUTE77]] ], [ [[NB_UPDATE95:%.*]], %[[FREM_LOOP_BODY85]] ] ; CHECK-NEXT: [[AX_LOOP_PHI88:%.*]] = phi float [ [[AX80]], %[[FREM_COMPUTE77]] ], [ [[AX_UPDATE94:%.*]], %[[FREM_LOOP_BODY85]] ] ; CHECK-NEXT: [[TMP104:%.*]] = fmul float [[AX_LOOP_PHI88]], [[AYINV84]] ; CHECK-NEXT: [[Q89:%.*]] = call float @llvm.rint.f32(float [[TMP104]]) ; CHECK-NEXT: [[TMP105:%.*]] = fneg float [[Q89]] ; CHECK-NEXT: [[AX90:%.*]] = call float @llvm.fma.f32(float [[TMP105]], float [[AY82]], float [[AX_LOOP_PHI88]]) ; CHECK-NEXT: [[CLT91:%.*]] = fcmp olt float [[AX90]], 0.000000e+00 ; CHECK-NEXT: [[AXP92:%.*]] = fadd float [[AX90]], [[AY82]] ; CHECK-NEXT: [[AX93:%.*]] = select i1 [[CLT91]], float [[AXP92]], float [[AX90]] ; CHECK-NEXT: [[AX_UPDATE94]] = call float @llvm.ldexp.f32.i32(float [[AX93]], i32 12) ; CHECK-NEXT: [[NB_UPDATE95]] = sub i32 [[NB_IV87]], 12 ; CHECK-NEXT: [[TMP106:%.*]] = icmp sgt i32 [[NB_IV87]], 12 ; CHECK-NEXT: br i1 [[TMP106]], label %[[FREM_LOOP_BODY85]], label %[[FREM_LOOP_EXIT86]] ; CHECK: [[FREM_LOOP_EXIT86]]: ; CHECK-NEXT: [[AX_EXIT_PHI96:%.*]] = phi float [ [[AX80]], %[[FREM_COMPUTE77]] ], [ [[AX_LOOP_PHI88]], %[[FREM_LOOP_BODY85]] ] ; CHECK-NEXT: [[NB_EXIT_PHI97:%.*]] = phi i32 [ [[NB_IV87]], %[[FREM_LOOP_BODY85]] ], [ [[NB83]], %[[FREM_COMPUTE77]] ] ; CHECK-NEXT: [[TMP107:%.*]] = sub i32 [[NB_EXIT_PHI97]], 12 ; CHECK-NEXT: [[TMP108:%.*]] = add i32 [[TMP107]], 1 ; CHECK-NEXT: [[AX98:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX_EXIT_PHI96]], i32 [[TMP108]]) ; CHECK-NEXT: [[TMP109:%.*]] = fmul float [[AX98]], [[AYINV84]] ; CHECK-NEXT: [[Q99:%.*]] = call float @llvm.rint.f32(float [[TMP109]]) ; CHECK-NEXT: [[TMP110:%.*]] = fneg float [[Q99]] ; CHECK-NEXT: [[AX100:%.*]] = call float @llvm.fma.f32(float [[TMP110]], float [[AY82]], float [[AX98]]) ; CHECK-NEXT: [[CLT101:%.*]] = fcmp olt float [[AX100]], 0.000000e+00 ; CHECK-NEXT: [[AXP102:%.*]] = fadd float [[AX100]], [[AY82]] ; CHECK-NEXT: [[AX103:%.*]] = select i1 [[CLT101]], float [[AXP102]], float [[AX100]] ; CHECK-NEXT: [[AX104:%.*]] = call float @llvm.ldexp.f32.i32(float [[AX103]], i32 [[EY81]]) ; CHECK-NEXT: [[TMP111]] = call float @llvm.copysign.f32(float [[AX104]], float [[TMP31]]) ; CHECK-NEXT: br label %[[BB34]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <4 x float>, ptr addrspace(1) %in2, i32 4 %r0 = load <4 x float>, ptr addrspace(1) %in1, align 16 %r1 = load <4 x float>, ptr addrspace(1) %gep2, align 16 %r2 = frem <4 x float> %r0, %r1 store <4 x float> %r2, ptr addrspace(1) %out, align 16 ret void } define amdgpu_kernel void @frem_v2f64(ptr addrspace(1) %out, ptr addrspace(1) %in1, ; CHECK-LABEL: define amdgpu_kernel void @frem_v2f64( ; CHECK-SAME: ptr addrspace(1) [[OUT:%.*]], ptr addrspace(1) [[IN1:%.*]], ptr addrspace(1) [[IN2:%.*]]) { ; CHECK-NEXT: [[GEP2:%.*]] = getelementptr <2 x double>, ptr addrspace(1) [[IN2]], i32 4 ; CHECK-NEXT: [[R0:%.*]] = load <2 x double>, ptr addrspace(1) [[IN1]], align 16 ; CHECK-NEXT: [[R1:%.*]] = load <2 x double>, ptr addrspace(1) [[GEP2]], align 16 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[R0]], i64 0 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[R1]], i64 0 ; CHECK-NEXT: [[AX:%.*]] = call double @llvm.fabs.f64(double [[TMP1]]) ; CHECK-NEXT: [[AY:%.*]] = call double @llvm.fabs.f64(double [[TMP2]]) ; CHECK-NEXT: [[TMP3:%.*]] = fcmp ogt double [[AX]], [[AY]] ; CHECK-NEXT: br i1 [[TMP3]], label %[[FREM_COMPUTE:.*]], label %[[FREM_ELSE:.*]] ; CHECK: [[BB4:.*]]: ; CHECK-NEXT: [[RET:%.*]] = phi double [ [[TMP37:%.*]], %[[FREM_LOOP_EXIT:.*]] ], [ [[TMP29:%.*]], %[[FREM_ELSE]] ] ; CHECK-NEXT: [[TMP5:%.*]] = fcmp ueq double [[TMP2]], 0.000000e+00 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[TMP5]], double 0x7FF8000000000000, double [[RET]] ; CHECK-NEXT: [[TMP7:%.*]] = call double @llvm.fabs.f64(double [[TMP1]]) ; CHECK-NEXT: [[TMP8:%.*]] = fcmp ult double [[TMP7]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP6]], double 0x7FF8000000000000 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> poison, double [[TMP9]], i64 0 ; CHECK-NEXT: [[TMP11:%.*]] = extractelement <2 x double> [[R0]], i64 1 ; CHECK-NEXT: [[TMP12:%.*]] = extractelement <2 x double> [[R1]], i64 1 ; CHECK-NEXT: [[AX12:%.*]] = call double @llvm.fabs.f64(double [[TMP11]]) ; CHECK-NEXT: [[AY13:%.*]] = call double @llvm.fabs.f64(double [[TMP12]]) ; CHECK-NEXT: [[TMP13:%.*]] = fcmp ogt double [[AX12]], [[AY13]] ; CHECK-NEXT: br i1 [[TMP13]], label %[[FREM_COMPUTE15:.*]], label %[[FREM_ELSE16:.*]] ; CHECK: [[BB14:.*]]: ; CHECK-NEXT: [[RET14:%.*]] = phi double [ [[TMP55:%.*]], %[[FREM_LOOP_EXIT24:.*]] ], [ [[TMP47:%.*]], %[[FREM_ELSE16]] ] ; CHECK-NEXT: [[TMP15:%.*]] = fcmp ueq double [[TMP12]], 0.000000e+00 ; CHECK-NEXT: [[TMP16:%.*]] = select i1 [[TMP15]], double 0x7FF8000000000000, double [[RET14]] ; CHECK-NEXT: [[TMP17:%.*]] = call double @llvm.fabs.f64(double [[TMP11]]) ; CHECK-NEXT: [[TMP18:%.*]] = fcmp ult double [[TMP17]], 0x7FF0000000000000 ; CHECK-NEXT: [[TMP19:%.*]] = select i1 [[TMP18]], double [[TMP16]], double 0x7FF8000000000000 ; CHECK-NEXT: [[R2:%.*]] = insertelement <2 x double> [[TMP10]], double [[TMP19]], i64 1 ; CHECK-NEXT: store <2 x double> [[R2]], ptr addrspace(1) [[OUT]], align 16 ; CHECK-NEXT: ret void ; CHECK: [[FREM_COMPUTE]]: ; CHECK-NEXT: [[TMP20:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX]]) ; CHECK-NEXT: [[TMP21:%.*]] = extractvalue { double, i32 } [[TMP20]], 0 ; CHECK-NEXT: [[TMP22:%.*]] = extractvalue { double, i32 } [[TMP20]], 1 ; CHECK-NEXT: [[EX:%.*]] = sub i32 [[TMP22]], 1 ; CHECK-NEXT: [[AX1:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP21]], i32 26) ; CHECK-NEXT: [[TMP23:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY]]) ; CHECK-NEXT: [[TMP24:%.*]] = extractvalue { double, i32 } [[TMP23]], 0 ; CHECK-NEXT: [[TMP25:%.*]] = extractvalue { double, i32 } [[TMP23]], 1 ; CHECK-NEXT: [[EY:%.*]] = sub i32 [[TMP25]], 1 ; CHECK-NEXT: [[AY2:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP24]], i32 1) ; CHECK-NEXT: [[NB:%.*]] = sub i32 [[EX]], [[EY]] ; CHECK-NEXT: [[AYINV:%.*]] = fdiv double 1.000000e+00, [[AY2]] ; CHECK-NEXT: [[TMP26:%.*]] = icmp sgt i32 [[NB]], 26 ; CHECK-NEXT: br i1 [[TMP26]], label %[[FREM_LOOP_BODY:.*]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_ELSE]]: ; CHECK-NEXT: [[TMP27:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP1]]) ; CHECK-NEXT: [[TMP28:%.*]] = fcmp oeq double [[AX]], [[AY]] ; CHECK-NEXT: [[TMP29]] = select i1 [[TMP28]], double [[TMP27]], double [[TMP1]] ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_LOOP_BODY]]: ; CHECK-NEXT: [[NB_IV:%.*]] = phi i32 [ [[NB]], %[[FREM_COMPUTE]] ], [ [[NB_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[AX_LOOP_PHI:%.*]] = phi double [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_UPDATE:%.*]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[TMP30:%.*]] = fmul double [[AX_LOOP_PHI]], [[AYINV]] ; CHECK-NEXT: [[Q:%.*]] = call double @llvm.rint.f64(double [[TMP30]]) ; CHECK-NEXT: [[TMP31:%.*]] = fneg double [[Q]] ; CHECK-NEXT: [[AX3:%.*]] = call double @llvm.fma.f64(double [[TMP31]], double [[AY2]], double [[AX_LOOP_PHI]]) ; CHECK-NEXT: [[CLT:%.*]] = fcmp olt double [[AX3]], 0.000000e+00 ; CHECK-NEXT: [[AXP:%.*]] = fadd double [[AX3]], [[AY2]] ; CHECK-NEXT: [[AX4:%.*]] = select i1 [[CLT]], double [[AXP]], double [[AX3]] ; CHECK-NEXT: [[AX_UPDATE]] = call double @llvm.ldexp.f64.i32(double [[AX4]], i32 26) ; CHECK-NEXT: [[NB_UPDATE]] = sub i32 [[NB_IV]], 26 ; CHECK-NEXT: [[TMP32:%.*]] = icmp sgt i32 [[NB_IV]], 26 ; CHECK-NEXT: br i1 [[TMP32]], label %[[FREM_LOOP_BODY]], label %[[FREM_LOOP_EXIT]] ; CHECK: [[FREM_LOOP_EXIT]]: ; CHECK-NEXT: [[AX_EXIT_PHI:%.*]] = phi double [ [[AX1]], %[[FREM_COMPUTE]] ], [ [[AX_LOOP_PHI]], %[[FREM_LOOP_BODY]] ] ; CHECK-NEXT: [[NB_EXIT_PHI:%.*]] = phi i32 [ [[NB_IV]], %[[FREM_LOOP_BODY]] ], [ [[NB]], %[[FREM_COMPUTE]] ] ; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[NB_EXIT_PHI]], 26 ; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], 1 ; CHECK-NEXT: [[AX5:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX_EXIT_PHI]], i32 [[TMP34]]) ; CHECK-NEXT: [[TMP35:%.*]] = fmul double [[AX5]], [[AYINV]] ; CHECK-NEXT: [[Q6:%.*]] = call double @llvm.rint.f64(double [[TMP35]]) ; CHECK-NEXT: [[TMP36:%.*]] = fneg double [[Q6]] ; CHECK-NEXT: [[AX7:%.*]] = call double @llvm.fma.f64(double [[TMP36]], double [[AY2]], double [[AX5]]) ; CHECK-NEXT: [[CLT8:%.*]] = fcmp olt double [[AX7]], 0.000000e+00 ; CHECK-NEXT: [[AXP9:%.*]] = fadd double [[AX7]], [[AY2]] ; CHECK-NEXT: [[AX10:%.*]] = select i1 [[CLT8]], double [[AXP9]], double [[AX7]] ; CHECK-NEXT: [[AX11:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX10]], i32 [[EY]]) ; CHECK-NEXT: [[TMP37]] = call double @llvm.copysign.f64(double [[AX11]], double [[TMP1]]) ; CHECK-NEXT: br label %[[BB4]] ; CHECK: [[FREM_COMPUTE15]]: ; CHECK-NEXT: [[TMP38:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AX12]]) ; CHECK-NEXT: [[TMP39:%.*]] = extractvalue { double, i32 } [[TMP38]], 0 ; CHECK-NEXT: [[TMP40:%.*]] = extractvalue { double, i32 } [[TMP38]], 1 ; CHECK-NEXT: [[EX17:%.*]] = sub i32 [[TMP40]], 1 ; CHECK-NEXT: [[AX18:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP39]], i32 26) ; CHECK-NEXT: [[TMP41:%.*]] = call { double, i32 } @llvm.frexp.f64.i32(double [[AY13]]) ; CHECK-NEXT: [[TMP42:%.*]] = extractvalue { double, i32 } [[TMP41]], 0 ; CHECK-NEXT: [[TMP43:%.*]] = extractvalue { double, i32 } [[TMP41]], 1 ; CHECK-NEXT: [[EY19:%.*]] = sub i32 [[TMP43]], 1 ; CHECK-NEXT: [[AY20:%.*]] = call double @llvm.ldexp.f64.i32(double [[TMP42]], i32 1) ; CHECK-NEXT: [[NB21:%.*]] = sub i32 [[EX17]], [[EY19]] ; CHECK-NEXT: [[AYINV22:%.*]] = fdiv double 1.000000e+00, [[AY20]] ; CHECK-NEXT: [[TMP44:%.*]] = icmp sgt i32 [[NB21]], 26 ; CHECK-NEXT: br i1 [[TMP44]], label %[[FREM_LOOP_BODY23:.*]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_ELSE16]]: ; CHECK-NEXT: [[TMP45:%.*]] = call double @llvm.copysign.f64(double 0.000000e+00, double [[TMP11]]) ; CHECK-NEXT: [[TMP46:%.*]] = fcmp oeq double [[AX12]], [[AY13]] ; CHECK-NEXT: [[TMP47]] = select i1 [[TMP46]], double [[TMP45]], double [[TMP11]] ; CHECK-NEXT: br label %[[BB14]] ; CHECK: [[FREM_LOOP_BODY23]]: ; CHECK-NEXT: [[NB_IV25:%.*]] = phi i32 [ [[NB21]], %[[FREM_COMPUTE15]] ], [ [[NB_UPDATE33:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[AX_LOOP_PHI26:%.*]] = phi double [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_UPDATE32:%.*]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[TMP48:%.*]] = fmul double [[AX_LOOP_PHI26]], [[AYINV22]] ; CHECK-NEXT: [[Q27:%.*]] = call double @llvm.rint.f64(double [[TMP48]]) ; CHECK-NEXT: [[TMP49:%.*]] = fneg double [[Q27]] ; CHECK-NEXT: [[AX28:%.*]] = call double @llvm.fma.f64(double [[TMP49]], double [[AY20]], double [[AX_LOOP_PHI26]]) ; CHECK-NEXT: [[CLT29:%.*]] = fcmp olt double [[AX28]], 0.000000e+00 ; CHECK-NEXT: [[AXP30:%.*]] = fadd double [[AX28]], [[AY20]] ; CHECK-NEXT: [[AX31:%.*]] = select i1 [[CLT29]], double [[AXP30]], double [[AX28]] ; CHECK-NEXT: [[AX_UPDATE32]] = call double @llvm.ldexp.f64.i32(double [[AX31]], i32 26) ; CHECK-NEXT: [[NB_UPDATE33]] = sub i32 [[NB_IV25]], 26 ; CHECK-NEXT: [[TMP50:%.*]] = icmp sgt i32 [[NB_IV25]], 26 ; CHECK-NEXT: br i1 [[TMP50]], label %[[FREM_LOOP_BODY23]], label %[[FREM_LOOP_EXIT24]] ; CHECK: [[FREM_LOOP_EXIT24]]: ; CHECK-NEXT: [[AX_EXIT_PHI34:%.*]] = phi double [ [[AX18]], %[[FREM_COMPUTE15]] ], [ [[AX_LOOP_PHI26]], %[[FREM_LOOP_BODY23]] ] ; CHECK-NEXT: [[NB_EXIT_PHI35:%.*]] = phi i32 [ [[NB_IV25]], %[[FREM_LOOP_BODY23]] ], [ [[NB21]], %[[FREM_COMPUTE15]] ] ; CHECK-NEXT: [[TMP51:%.*]] = sub i32 [[NB_EXIT_PHI35]], 26 ; CHECK-NEXT: [[TMP52:%.*]] = add i32 [[TMP51]], 1 ; CHECK-NEXT: [[AX36:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX_EXIT_PHI34]], i32 [[TMP52]]) ; CHECK-NEXT: [[TMP53:%.*]] = fmul double [[AX36]], [[AYINV22]] ; CHECK-NEXT: [[Q37:%.*]] = call double @llvm.rint.f64(double [[TMP53]]) ; CHECK-NEXT: [[TMP54:%.*]] = fneg double [[Q37]] ; CHECK-NEXT: [[AX38:%.*]] = call double @llvm.fma.f64(double [[TMP54]], double [[AY20]], double [[AX36]]) ; CHECK-NEXT: [[CLT39:%.*]] = fcmp olt double [[AX38]], 0.000000e+00 ; CHECK-NEXT: [[AXP40:%.*]] = fadd double [[AX38]], [[AY20]] ; CHECK-NEXT: [[AX41:%.*]] = select i1 [[CLT39]], double [[AXP40]], double [[AX38]] ; CHECK-NEXT: [[AX42:%.*]] = call double @llvm.ldexp.f64.i32(double [[AX41]], i32 [[EY19]]) ; CHECK-NEXT: [[TMP55]] = call double @llvm.copysign.f64(double [[AX42]], double [[TMP11]]) ; CHECK-NEXT: br label %[[BB14]] ; ptr addrspace(1) %in2) { %gep2 = getelementptr <2 x double>, ptr addrspace(1) %in2, i32 4 %r0 = load <2 x double>, ptr addrspace(1) %in1, align 16 %r1 = load <2 x double>, ptr addrspace(1) %gep2, align 16 %r2 = frem <2 x double> %r0, %r1 store <2 x double> %r2, ptr addrspace(1) %out, align 16 ret void }