diff options
Diffstat (limited to 'llvm/test/CodeGen/NVPTX/combine-wide.ll')
-rw-r--r-- | llvm/test/CodeGen/NVPTX/combine-wide.ll | 1339 |
1 files changed, 1339 insertions, 0 deletions
diff --git a/llvm/test/CodeGen/NVPTX/combine-wide.ll b/llvm/test/CodeGen/NVPTX/combine-wide.ll new file mode 100644 index 0000000..ed4a2b6 --- /dev/null +++ b/llvm/test/CodeGen/NVPTX/combine-wide.ll @@ -0,0 +1,1339 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1 +; RUN: llc < %s -O0 | FileCheck %s --check-prefixes=CHECK,O0 + +target triple = "nvptx64-nvidia-cuda" + +define i64 @t1(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t1( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t1_param_2]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t1( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t1_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t1_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t1_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 %c, %sext + ret i64 %add +} + +define i64 @t2(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t2( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t2_param_2]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t2( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t2_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t2_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t2_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd2, %rd1; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 %sext, %c + ret i64 %add +} + +define i64 @t3(i32 %a, i32 %b) { +; +; O1-LABEL: t3( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O1-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t3( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t3_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t3_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: add.s64 %rd2, %rd1, 1; +; O0-NEXT: st.param.b64 [func_retval0], %rd2; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + %add = add i64 1, %sext + ret i64 %add +} + +define i64 @t4(i32 %a, i64 %c) { +; +; O1-LABEL: t4( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O1-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O1-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t4( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t4_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t4_param_0]; +; O0-NEXT: mul.lo.s32 %r2, %r1, 3; +; O0-NEXT: cvt.s64.s32 %rd2, %r2; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, 3 + %sext = sext i32 %mul to i64 + %add = add i64 %c, %sext + ret i64 %add +} + +define i64 @t4_1(i32 %a, i64 %c) { +; +; O1-LABEL: t4_1( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O1-NEXT: mad.wide.s32 %rd1, %r1, 3, 5; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t4_1( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t4_1_param_0]; +; O0-NEXT: mul.lo.s32 %r2, %r1, 3; +; O0-NEXT: cvt.s64.s32 %rd1, %r2; +; O0-NEXT: add.s64 %rd2, %rd1, 5; +; O0-NEXT: st.param.b64 [func_retval0], %rd2; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, 3 + %sext = sext i32 %mul to i64 + %add = add i64 5, %sext + ret i64 %add +} + +define i64 @t5(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t5( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t5_param_2]; +; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t5( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t5_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t5_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t5_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd1, %rd2; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + %add = add i64 %c, %zext + ret i64 %add +} + +define i64 @t6(i32 %a, i32 %b, i64 %c) { +; +; O1-LABEL: t6( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O1-NEXT: ld.param.b64 %rd1, [t6_param_2]; +; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1; +; O1-NEXT: st.param.b64 [func_retval0], %rd2; +; O1-NEXT: ret; +; +; O0-LABEL: t6( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t6_param_2]; +; O0-NEXT: ld.param.b32 %r2, [t6_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t6_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd2, %r3; +; O0-NEXT: add.s64 %rd3, %rd2, %rd1; +; O0-NEXT: st.param.b64 [func_retval0], %rd3; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + %add = add i64 %zext, %c + ret i64 %add +} + +define i32 @t7(i16 %a, i16 %b) { +; +; O1-LABEL: t7( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t7_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t7_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t7( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t7_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t7_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t8(i16 %a, i16 %b) { +; +; O1-LABEL: t8( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t8_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t8_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s32.s16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t8( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t8_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t8_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t9(i32 %a, i32 %b) { +; +; O1-LABEL: t9( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t9_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t9_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.u64.u32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t9( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t9_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t9_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t10(i32 %a, i32 %b) { +; +; O1-LABEL: t10( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t10_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t10_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t10( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t10_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t10_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t11(i16 %a, i16 %b) { +; +; O1-LABEL: t11( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t11_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t11_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t11( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t11_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t11_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t12(i16 %a, i16 %b) { +; +; O1-LABEL: t12( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t12_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t12_param_1]; +; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t12( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t12_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t12_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t13(i32 %a, i32 %b) { +; +; O1-LABEL: t13( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t13_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t13_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.u64.u32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t13( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t13_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t13_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t14(i32 %a, i32 %b) { +; +; O1-LABEL: t14( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t14_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t14_param_1]; +; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t14( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t14_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t14_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t15(i16 %a, i16 %b) { +; +; O1-LABEL: t15( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t15_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t15_param_1]; +; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t15( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t15_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t15_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t16(i16 %a, i16 %b) { +; +; O1-LABEL: t16( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t16_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t16_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s32.s16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t16( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t16_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t16_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t17(i32 %a, i32 %b) { +; +; O1-LABEL: t17( +; O1: { +; O1-NEXT: .reg .b32 %r<3>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t17_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t17_param_1]; +; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t17( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t17_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t17_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.u64.u32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t18(i32 %a, i32 %b) { +; +; O1-LABEL: t18( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t18_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t18_param_1]; +; O1-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t18( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t18_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t18_param_0]; +; O0-NEXT: mul.lo.s32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t19(i16 %a, i16 %b) { +; +; O1-LABEL: t19( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t19_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t19_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t19( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t19_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t19_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t20(i16 %a) { +; +; CHECK-LABEL: t20( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t20_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.s32.s16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t21(i32 %a) { +; +; CHECK-LABEL: t21( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t21_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t22(i32 %a) { +; +; CHECK-LABEL: t22( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t22_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.s64.s32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t23(i16 %a, i16 %b) { +; +; CHECK-LABEL: t23( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t23_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.u32.u16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl nsw i16 %a, 4 + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t24(i16 %a, i16 %b) { +; +; O1-LABEL: t24( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t24_param_0]; +; O1-NEXT: mul.wide.s16 %r1, %rs1, 16; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t24( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t24_param_0]; +; O0-NEXT: shl.b16 %rs2, %rs1, 4; +; O0-NEXT: cvt.s32.s16 %r1, %rs2; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = shl nsw i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t25(i32 %a) { +; +; CHECK-LABEL: t25( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t25_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.u64.u32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl nsw i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t26(i32 %a) { +; +; O1-LABEL: t26( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t26_param_0]; +; O1-NEXT: mul.wide.s32 %rd1, %r1, 16; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t26( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t26_param_0]; +; O0-NEXT: shl.b32 %r2, %r1, 4; +; O0-NEXT: cvt.s64.s32 %rd1, %r2; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nsw i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t27(i16 %a, i16 %b) { +; +; O1-LABEL: t27( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t27_param_0]; +; O1-NEXT: mul.wide.u16 %r1, %rs1, 16; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t27( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t27_param_0]; +; O0-NEXT: shl.b16 %rs2, %rs1, 4; +; O0-NEXT: cvt.u32.u16 %r1, %rs2; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = shl nuw i16 %a, 4 + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t28(i16 %a, i16 %b) { +; +; CHECK-LABEL: t28( +; CHECK: { +; CHECK-NEXT: .reg .b16 %rs<3>; +; CHECK-NEXT: .reg .b32 %r<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b16 %rs1, [t28_param_0]; +; CHECK-NEXT: shl.b16 %rs2, %rs1, 4; +; CHECK-NEXT: cvt.s32.s16 %r1, %rs2; +; CHECK-NEXT: st.param.b32 [func_retval0], %r1; +; CHECK-NEXT: ret; + %mul = shl nuw i16 %a, 4 + %sext = sext i16 %mul to i32 + ret i32 %sext +} + +define i64 @t29(i32 %a) { +; +; O1-LABEL: t29( +; O1: { +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t29_param_0]; +; O1-NEXT: mul.wide.u32 %rd1, %r1, 16; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t29( +; O0: { +; O0-NEXT: .reg .b32 %r<3>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t29_param_0]; +; O0-NEXT: shl.b32 %r2, %r1, 4; +; O0-NEXT: cvt.u64.u32 %rd1, %r2; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nuw i32 %a, 4 + %zext = zext i32 %mul to i64 + ret i64 %zext +} + +define i64 @t30(i32 %a) { +; +; CHECK-LABEL: t30( +; CHECK: { +; CHECK-NEXT: .reg .b32 %r<3>; +; CHECK-NEXT: .reg .b64 %rd<2>; +; CHECK-EMPTY: +; CHECK-NEXT: // %bb.0: +; CHECK-NEXT: ld.param.b32 %r1, [t30_param_0]; +; CHECK-NEXT: shl.b32 %r2, %r1, 4; +; CHECK-NEXT: cvt.s64.s32 %rd1, %r2; +; CHECK-NEXT: st.param.b64 [func_retval0], %rd1; +; CHECK-NEXT: ret; + %mul = shl nuw i32 %a, 4 + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i64 @t31(i32 %a, i32 %b) { +; +; O1-LABEL: t31( +; O1: { +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b32 %r1, [t31_param_0]; +; O1-NEXT: ld.param.b32 %r2, [t31_param_1]; +; O1-NEXT: shl.b32 %r3, %r1, %r2; +; O1-NEXT: cvt.s64.s32 %rd1, %r3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t31( +; O0: { +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r2, [t31_param_1]; +; O0-NEXT: ld.param.b32 %r1, [t31_param_0]; +; O0-NEXT: shl.b32 %r3, %r1, %r2; +; O0-NEXT: cvt.s64.s32 %rd1, %r3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = shl nuw i32 %a, %b + %sext = sext i32 %mul to i64 + ret i64 %sext +} + +define i32 @t32(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t32( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t32_param_2]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t32( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t32_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t32_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t32_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t33(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t33( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t33_param_2]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t33( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t33_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t33_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t33_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t34(i16 %a, i16 %b) { +; +; O1-LABEL: t34( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O1-NEXT: mad.wide.s16 %r1, %rs1, %rs2, 1; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t34( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t34_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t34_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s32.s16 %r1, %rs3; +; O0-NEXT: add.s32 %r2, %r1, 1; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i32 + %add = add i32 1, %sext + ret i32 %add +} + +define i32 @t35(i16 %a, i32 %c) { +; +; O1-LABEL: t35( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O1-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O1-NEXT: mad.wide.s16 %r2, %rs1, 3, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t35( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t35_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t35_param_0]; +; O0-NEXT: mul.lo.s16 %rs2, %rs1, 3; +; O0-NEXT: cvt.s32.s16 %r2, %rs2; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, 3 + %sext = sext i16 %mul to i32 + %add = add i32 %c, %sext + ret i32 %add +} + +define i32 @t36(i16 %a, i32 %c) { +; +; O1-LABEL: t36( +; O1: { +; O1-NEXT: .reg .b16 %rs<2>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O1-NEXT: mad.wide.s16 %r1, %rs1, 3, 5; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t36( +; O0: { +; O0-NEXT: .reg .b16 %rs<3>; +; O0-NEXT: .reg .b32 %r<3>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs1, [t36_param_0]; +; O0-NEXT: mul.lo.s16 %rs2, %rs1, 3; +; O0-NEXT: cvt.s32.s16 %r1, %rs2; +; O0-NEXT: add.s32 %r2, %r1, 5; +; O0-NEXT: st.param.b32 [func_retval0], %r2; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, 3 + %sext = sext i16 %mul to i32 + %add = add i32 5, %sext + ret i32 %add +} + +define i32 @t37(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t37( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t37_param_2]; +; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t37( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t37_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t37_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t37_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r1, %r2; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + %add = add i32 %c, %zext + ret i32 %add +} + +define i32 @t38(i16 %a, i16 %b, i32 %c) { +; +; O1-LABEL: t38( +; O1: { +; O1-NEXT: .reg .b16 %rs<3>; +; O1-NEXT: .reg .b32 %r<3>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O1-NEXT: ld.param.b32 %r1, [t38_param_2]; +; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r2; +; O1-NEXT: ret; +; +; O0-LABEL: t38( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b32 %r1, [t38_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t38_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t38_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r2, %r1; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i32 + %add = add i32 %zext, %c + ret i32 %add +} + +define i64 @t39(i16 %a, i16 %b) { +; O1-LABEL: t39( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t39_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t39_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u64.u16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t39( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t39_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t39_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u64.u16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul i16 %a, %b + %zext = zext i16 %mul to i64 + ret i64 %zext +} + +define i64 @t40(i16 %a, i16 %b) { +; O1-LABEL: t40( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t40_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t40_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.u64.u16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t40( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t40_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t40_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.u64.u16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + %zext = zext i16 %mul to i64 + ret i64 %zext +} + +define i64 @t41(i16 %a, i16 %b) { +; O1-LABEL: t41( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t41_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t41_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: cvt.s64.s16 %rd1, %rs3; +; O1-NEXT: st.param.b64 [func_retval0], %rd1; +; O1-NEXT: ret; +; +; O0-LABEL: t41( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b16 %rs2, [t41_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t41_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: cvt.s64.s16 %rd1, %rs3; +; O0-NEXT: st.param.b64 [func_retval0], %rd1; +; O0-NEXT: ret; + %mul = mul nsw i16 %a, %b + %sext = sext i16 %mul to i64 + ret i64 %sext +} + +define i32 @t42(i16 %a, i16 %b, ptr %ptr) { +; O1-LABEL: t42( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<2>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t42_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t42_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: ld.param.b64 %rd1, [t42_param_2]; +; O1-NEXT: st.b16 [%rd1], %rs3; +; O1-NEXT: cvt.u32.u16 %r1, %rs3; +; O1-NEXT: st.param.b32 [func_retval0], %r1; +; O1-NEXT: ret; +; +; O0-LABEL: t42( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<2>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t42_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t42_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t42_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: st.b16 [%rd1], %rs3; +; O0-NEXT: cvt.u32.u16 %r1, %rs3; +; O0-NEXT: st.param.b32 [func_retval0], %r1; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + store i16 %mul, ptr %ptr + %zext = zext i16 %mul to i32 + ret i32 %zext +} + +define i32 @t43(i16 %a, i16 %b, i32 %c, ptr %ptr) { +; O1-LABEL: t43( +; O1: { +; O1-NEXT: .reg .b16 %rs<4>; +; O1-NEXT: .reg .b32 %r<4>; +; O1-NEXT: .reg .b64 %rd<2>; +; O1-EMPTY: +; O1-NEXT: // %bb.0: +; O1-NEXT: ld.param.b16 %rs1, [t43_param_0]; +; O1-NEXT: ld.param.b16 %rs2, [t43_param_1]; +; O1-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O1-NEXT: ld.param.b64 %rd1, [t43_param_3]; +; O1-NEXT: st.b16 [%rd1], %rs3; +; O1-NEXT: ld.param.b32 %r1, [t43_param_2]; +; O1-NEXT: cvt.u32.u16 %r2, %rs3; +; O1-NEXT: add.s32 %r3, %r2, %r1; +; O1-NEXT: st.param.b32 [func_retval0], %r3; +; O1-NEXT: ret; +; +; O0-LABEL: t43( +; O0: { +; O0-NEXT: .reg .b16 %rs<4>; +; O0-NEXT: .reg .b32 %r<4>; +; O0-NEXT: .reg .b64 %rd<2>; +; O0-EMPTY: +; O0-NEXT: // %bb.0: +; O0-NEXT: ld.param.b64 %rd1, [t43_param_3]; +; O0-NEXT: ld.param.b32 %r1, [t43_param_2]; +; O0-NEXT: ld.param.b16 %rs2, [t43_param_1]; +; O0-NEXT: ld.param.b16 %rs1, [t43_param_0]; +; O0-NEXT: mul.lo.s16 %rs3, %rs1, %rs2; +; O0-NEXT: st.b16 [%rd1], %rs3; +; O0-NEXT: cvt.u32.u16 %r2, %rs3; +; O0-NEXT: add.s32 %r3, %r2, %r1; +; O0-NEXT: st.param.b32 [func_retval0], %r3; +; O0-NEXT: ret; + %mul = mul nuw i16 %a, %b + store i16 %mul, ptr %ptr + %zext = zext i16 %mul to i32 + %add = add i32 %zext, %c + ret i32 %add +} |