; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 -passes=instcombine -S < %s | FileCheck %s ; The readfirstlane version of this test covers all the interesting cases of the ; shared logic. This testcase focuses on readlane specific pitfalls. ; test unary define float @hoist_fneg_f32(float %arg, i32 %lane) { ; CHECK-LABEL: define float @hoist_fneg_f32( ; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = fneg float [[RL]] ; CHECK-NEXT: ret float [[RFL]] ; bb: %val = fneg float %arg %rl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane) ret float %rl } define double @hoist_fneg_f64(double %arg, i32 %lane) { ; CHECK-LABEL: define double @hoist_fneg_f64( ; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = fneg double [[RL]] ; CHECK-NEXT: ret double [[RFL]] ; bb: %val = fneg double %arg %rl = call double @llvm.amdgcn.readlane.f64(double %val, i32 %lane) ret double %rl } ; test casts define i32 @hoist_trunc(i64 %arg, i32 %lane) { ; CHECK-LABEL: define i32 @hoist_trunc( ; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[TMP0:%.*]] = trunc i64 [[RL]] to i32 ; CHECK-NEXT: ret i32 [[TMP0]] ; bb: %val = trunc i64 %arg to i32 %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) ret i32 %rl } define i64 @hoist_zext(i32 %arg, i32 %lane) { ; CHECK-LABEL: define i64 @hoist_zext( ; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[TMP0:%.*]] = zext i32 [[RL]] to i64 ; CHECK-NEXT: ret i64 [[TMP0]] ; bb: %val = zext i32 %arg to i64 %rl = call i64 @llvm.amdgcn.readlane.i64(i64 %val, i32 %lane) ret i64 %rl } ; test binary i32 define i32 @hoist_add_i32(i32 %arg, i32 %lane) { ; CHECK-LABEL: define i32 @hoist_add_i32( ; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = add i32 [[RL]], 16777215 ; CHECK-NEXT: ret i32 [[RFL]] ; bb: %val = add i32 %arg, 16777215 %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) ret i32 %rl } define float @hoist_fadd_f32(float %arg, i32 %lane) { ; CHECK-LABEL: define float @hoist_fadd_f32( ; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = fadd float [[RL]], 1.280000e+02 ; CHECK-NEXT: ret float [[RFL]] ; bb: %val = fadd float %arg, 128.0 %rl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane) ret float %rl } ; test binary i64 define i64 @hoist_and_i64(i64 %arg, i32 %lane) { ; CHECK-LABEL: define i64 @hoist_and_i64( ; CHECK-SAME: i64 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call i64 @llvm.amdgcn.readlane.i64(i64 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = and i64 [[RL]], 16777215 ; CHECK-NEXT: ret i64 [[RFL]] ; bb: %val = and i64 %arg, 16777215 %rl = call i64 @llvm.amdgcn.readlane.i32(i64 %val, i32 %lane) ret i64 %rl } define double @hoist_fadd_f64(double %arg, i32 %lane) { ; CHECK-LABEL: define double @hoist_fadd_f64( ; CHECK-SAME: double [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call double @llvm.amdgcn.readlane.f64(double [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = fadd double [[RL]], 1.280000e+02 ; CHECK-NEXT: ret double [[RFL]] ; bb: %val = fadd double %arg, 128.0 %rl = call double @llvm.amdgcn.readlane.f64(double %val, i32 %lane) ret double %rl } ; test constant on LHS define i32 @hoist_sub_i32_lhs(i32 %arg, i32 %lane) { ; CHECK-LABEL: define i32 @hoist_sub_i32_lhs( ; CHECK-SAME: i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = sub i32 16777215, [[RL]] ; CHECK-NEXT: ret i32 [[RFL]] ; bb: %val = sub i32 16777215, %arg %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) ret i32 %rl } define float @hoist_fsub_f32_lhs(float %arg, i32 %lane) { ; CHECK-LABEL: define float @hoist_fsub_f32_lhs( ; CHECK-SAME: float [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[RL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[RFL:%.*]] = fsub float 1.280000e+02, [[RL]] ; CHECK-NEXT: ret float [[RFL]] ; bb: %val = fsub float 128.0, %arg %rl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane) ret float %rl } define i32 @readlane_lane_op_in_other_block(i1 %cond, i32 %arg, i32 %base) { ; CHECK-LABEL: define i32 @readlane_lane_op_in_other_block( ; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*]]: ; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2 ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]] ; CHECK: [[THEN]]: ; CHECK-NEXT: [[RL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[RL]], 16777215 ; CHECK-NEXT: br label %[[END]] ; CHECK: [[END]]: ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], %[[THEN]] ], [ [[LANE]], %[[BB]] ] ; CHECK-NEXT: ret i32 [[RES]] ; bb: %lane = add i32 %base, 2 br i1 %cond, label %then, label %end then: %val = add i32 %arg, 16777215 %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) br label %end end: %res = phi i32 [%rl, %then], [%lane, %bb] ret i32 %res } ; Check cases where we can't move the readlane higher define float @cannot_move_readlane(float %arg, i32 %base) { ; CHECK-LABEL: define float @cannot_move_readlane( ; CHECK-SAME: float [[ARG:%.*]], i32 [[BASE:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[BB:.*:]] ; CHECK-NEXT: [[VAL:%.*]] = fsub float 1.280000e+02, [[ARG]] ; CHECK-NEXT: [[LANE:%.*]] = add i32 [[BASE]], 2 ; CHECK-NEXT: [[RFL:%.*]] = call float @llvm.amdgcn.readlane.f32(float [[VAL]], i32 [[LANE]]) ; CHECK-NEXT: ret float [[RFL]] ; bb: %val = fsub float 128.0, %arg %lane = add i32 %base, 2 %rl = call float @llvm.amdgcn.readlane.f32(float %val, i32 %lane) ret float %rl } ; test that convergence tokens are preserved define i32 @hoist_preserves_convergence_token(i1 %cond, i32 %arg, i32 %lane) convergent { ; CHECK-LABEL: define i32 @hoist_preserves_convergence_token( ; CHECK-SAME: i1 [[COND:%.*]], i32 [[ARG:%.*]], i32 [[LANE:%.*]]) #[[ATTR1:[0-9]+]] { ; CHECK-NEXT: [[BB:.*]]: ; CHECK-NEXT: [[ENTRY:%.*]] = call token @llvm.experimental.convergence.entry() ; CHECK-NEXT: br i1 [[COND]], label %[[THEN:.*]], label %[[END:.*]] ; CHECK: [[THEN]]: ; CHECK-NEXT: [[RL:%.*]] = call i32 @llvm.amdgcn.readlane.i32(i32 [[ARG]], i32 [[LANE]]) [ "convergencectrl"(token [[ENTRY]]) ] ; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[RL]], 16777215 ; CHECK-NEXT: br label %[[END]] ; CHECK: [[END]]: ; CHECK-NEXT: [[RES:%.*]] = phi i32 [ [[TMP0]], %[[THEN]] ], [ [[ARG]], %[[BB]] ] ; CHECK-NEXT: ret i32 [[RES]] ; bb: %entry = call token @llvm.experimental.convergence.entry() br i1 %cond, label %then, label %end then: %val = add i32 %arg, 16777215 %rl = call i32 @llvm.amdgcn.readlane.i32(i32 %val, i32 %lane) [ "convergencectrl"(token %entry)] br label %end end: %res = phi i32 [%rl, %then], [%arg, %bb] ret i32 %res }