; NOTE: Assertions have been autogenerated by utils/update_test_checks.py ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,SSE ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,AVX2 ; RUN: opt < %s -passes=vector-combine -S -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,AVX512 define i32 @movmsk_i32_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: @movmsk_i32_v32i8_v16i8( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V1:%.*]], <16 x i8> [[V0:%.*]], <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <32 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[OR:%.*]] = bitcast <32 x i1> [[TMP2]] to i32 ; CHECK-NEXT: ret i32 [[OR]] ; %c0 = icmp slt <16 x i8> %v0, zeroinitializer %c1 = icmp slt <16 x i8> %v1, zeroinitializer %b0 = bitcast <16 x i1> %c0 to i16 %b1 = bitcast <16 x i1> %c1 to i16 %z0 = zext i16 %b0 to i32 %z1 = zext i16 %b1 to i32 %s0 = shl nuw i32 %z0, 16 %or = or disjoint i32 %s0, %z1 ret i32 %or } define i32 @movmsk_i32_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @movmsk_i32_v8i32_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8 ; CHECK-NEXT: [[OR:%.*]] = zext i8 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[OR]] ; %c0 = icmp slt <4 x i32> %v0, zeroinitializer %c1 = icmp slt <4 x i32> %v1, zeroinitializer %b0 = bitcast <4 x i1> %c0 to i4 %b1 = bitcast <4 x i1> %c1 to i4 %z0 = zext i4 %b0 to i32 %z1 = zext i4 %b1 to i32 %s0 = shl nuw i32 %z0, 4 %or = or disjoint i32 %s0, %z1 ret i32 %or } define i64 @movmsk_i64_v32i8_v16i8(<16 x i8> %v0, <16 x i8> %v1) { ; CHECK-LABEL: @movmsk_i64_v32i8_v16i8( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V1:%.*]], <16 x i8> [[V0:%.*]], <32 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <32 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <32 x i1> [[TMP2]] to i32 ; CHECK-NEXT: [[OR:%.*]] = zext i32 [[TMP3]] to i64 ; CHECK-NEXT: ret i64 [[OR]] ; %c0 = icmp slt <16 x i8> %v0, zeroinitializer %c1 = icmp slt <16 x i8> %v1, zeroinitializer %b0 = bitcast <16 x i1> %c0 to i16 %b1 = bitcast <16 x i1> %c1 to i16 %z0 = zext i16 %b0 to i64 %z1 = zext i16 %b1 to i64 %s0 = shl nuw i64 %z0, 16 %or = or disjoint i64 %s0, %z1 ret i64 %or } define i64 @movmsk_i64_v8i32_v4i32(<4 x i32> %v0, <4 x i32> %v1) { ; CHECK-LABEL: @movmsk_i64_v8i32_v4i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <8 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <8 x i1> [[TMP2]] to i8 ; CHECK-NEXT: [[OR:%.*]] = zext i8 [[TMP3]] to i64 ; CHECK-NEXT: ret i64 [[OR]] ; %c0 = icmp slt <4 x i32> %v0, zeroinitializer %c1 = icmp slt <4 x i32> %v1, zeroinitializer %b0 = bitcast <4 x i1> %c0 to i4 %b1 = bitcast <4 x i1> %c1 to i4 %z0 = zext i4 %b0 to i64 %z1 = zext i4 %b1 to i64 %s0 = shl nuw i64 %z0, 4 %or = or disjoint i64 %s0, %z1 ret i64 %or } define i64 @movmsk_i64_v64i8_v16i8(<16 x i8> %v0, <16 x i8> %v1, <16 x i8> %v2, <16 x i8> %v3) { ; SSE-LABEL: @movmsk_i64_v64i8_v16i8( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V3:%.*]], <16 x i8> [[V2:%.*]], <32 x i32> ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[V1:%.*]], <16 x i8> [[V0:%.*]], <32 x i32> ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP1]], <32 x i8> [[TMP2]], <64 x i32> ; SSE-NEXT: [[TMP4:%.*]] = icmp slt <64 x i8> [[TMP3]], zeroinitializer ; SSE-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP4]] to i64 ; SSE-NEXT: ret i64 [[OR]] ; ; AVX2-LABEL: @movmsk_i64_v64i8_v16i8( ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V1:%.*]], <16 x i8> [[V0:%.*]], <32 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[V3:%.*]], <16 x i8> [[V2:%.*]], <32 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> [[TMP1]], <64 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = icmp slt <64 x i8> [[TMP3]], zeroinitializer ; AVX2-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP4]] to i64 ; AVX2-NEXT: ret i64 [[OR]] ; ; AVX512-LABEL: @movmsk_i64_v64i8_v16i8( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[V1:%.*]], <16 x i8> [[V0:%.*]], <32 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[V3:%.*]], <16 x i8> [[V2:%.*]], <32 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <32 x i8> [[TMP2]], <32 x i8> [[TMP1]], <64 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = icmp slt <64 x i8> [[TMP3]], zeroinitializer ; AVX512-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP4]] to i64 ; AVX512-NEXT: ret i64 [[OR]] ; %c0 = icmp slt <16 x i8> %v0, zeroinitializer %c1 = icmp slt <16 x i8> %v1, zeroinitializer %c2 = icmp slt <16 x i8> %v2, zeroinitializer %c3 = icmp slt <16 x i8> %v3, zeroinitializer %b0 = bitcast <16 x i1> %c0 to i16 %b1 = bitcast <16 x i1> %c1 to i16 %b2 = bitcast <16 x i1> %c2 to i16 %b3 = bitcast <16 x i1> %c3 to i16 %z0 = zext i16 %b0 to i64 %z1 = zext i16 %b1 to i64 %z2 = zext i16 %b2 to i64 %z3 = zext i16 %b3 to i64 %s0 = shl nuw i64 %z0, 48 %s1 = shl nuw i64 %z1, 32 %s2 = shl nuw i64 %z2, 16 %or0 = or disjoint i64 %s0, %s1 %or1 = or disjoint i64 %s2, %z3 %or = or disjoint i64 %or0, %or1 ret i64 %or } define i64 @movmsk_i64_v32i32_v4i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3) { ; SSE-LABEL: @movmsk_i64_v32i32_v4i32( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V3:%.*]], <4 x i32> [[V2:%.*]], <8 x i32> ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <8 x i32> ; SSE-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP1]], <8 x i32> [[TMP2]], <16 x i32> ; SSE-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[TMP3]], zeroinitializer ; SSE-NEXT: [[TMP5:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 ; SSE-NEXT: [[OR:%.*]] = zext i16 [[TMP5]] to i64 ; SSE-NEXT: ret i64 [[OR]] ; ; AVX2-LABEL: @movmsk_i64_v32i32_v4i32( ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <8 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V3:%.*]], <4 x i32> [[V2:%.*]], <8 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP1]], <16 x i32> ; AVX2-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[TMP3]], zeroinitializer ; AVX2-NEXT: [[TMP5:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 ; AVX2-NEXT: [[OR:%.*]] = zext i16 [[TMP5]] to i64 ; AVX2-NEXT: ret i64 [[OR]] ; ; AVX512-LABEL: @movmsk_i64_v32i32_v4i32( ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <4 x i32> [[V1:%.*]], <4 x i32> [[V0:%.*]], <8 x i32> ; AVX512-NEXT: [[TMP2:%.*]] = shufflevector <4 x i32> [[V3:%.*]], <4 x i32> [[V2:%.*]], <8 x i32> ; AVX512-NEXT: [[TMP3:%.*]] = shufflevector <8 x i32> [[TMP2]], <8 x i32> [[TMP1]], <16 x i32> ; AVX512-NEXT: [[TMP4:%.*]] = icmp slt <16 x i32> [[TMP3]], zeroinitializer ; AVX512-NEXT: [[TMP5:%.*]] = bitcast <16 x i1> [[TMP4]] to i16 ; AVX512-NEXT: [[OR:%.*]] = zext i16 [[TMP5]] to i64 ; AVX512-NEXT: ret i64 [[OR]] ; %c0 = icmp slt <4 x i32> %v0, zeroinitializer %c1 = icmp slt <4 x i32> %v1, zeroinitializer %c2 = icmp slt <4 x i32> %v2, zeroinitializer %c3 = icmp slt <4 x i32> %v3, zeroinitializer %b0 = bitcast <4 x i1> %c0 to i4 %b1 = bitcast <4 x i1> %c1 to i4 %b2 = bitcast <4 x i1> %c2 to i4 %b3 = bitcast <4 x i1> %c3 to i4 %z0 = zext i4 %b0 to i64 %z1 = zext i4 %b1 to i64 %z2 = zext i4 %b2 to i64 %z3 = zext i4 %b3 to i64 %s0 = shl nuw i64 %z0, 12 %s1 = shl nuw i64 %z1, 8 %s2 = shl nuw i64 %z2, 4 %or0 = or disjoint i64 %s0, %s1 %or1 = or disjoint i64 %s2, %z3 %or = or disjoint i64 %or0, %or1 ret i64 %or } define i64 @movmsk_i64_v64i8_v32i8(<32 x i8> %v0, <32 x i8> %v1) { ; CHECK-LABEL: @movmsk_i64_v64i8_v32i8( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[V1:%.*]], <32 x i8> [[V0:%.*]], <64 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <64 x i8> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP2]] to i64 ; CHECK-NEXT: ret i64 [[OR]] ; %c0 = icmp slt <32 x i8> %v0, zeroinitializer %c1 = icmp slt <32 x i8> %v1, zeroinitializer %b0 = bitcast <32 x i1> %c0 to i32 %b1 = bitcast <32 x i1> %c1 to i32 %z0 = zext i32 %b0 to i64 %z1 = zext i32 %b1 to i64 %s0 = shl nuw i64 %z0, 32 %or = or disjoint i64 %s0, %z1 ret i64 %or } define i32 @movmsk_i32_v16i32_v8i32(<8 x i32> %v0, <8 x i32> %v1) { ; CHECK-LABEL: @movmsk_i32_v16i32_v8i32( ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[V1:%.*]], <8 x i32> [[V0:%.*]], <16 x i32> ; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <16 x i32> [[TMP1]], zeroinitializer ; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i1> [[TMP2]] to i16 ; CHECK-NEXT: [[OR:%.*]] = zext i16 [[TMP3]] to i32 ; CHECK-NEXT: ret i32 [[OR]] ; %c0 = icmp slt <8 x i32> %v0, zeroinitializer %c1 = icmp slt <8 x i32> %v1, zeroinitializer %b0 = bitcast <8 x i1> %c0 to i8 %b1 = bitcast <8 x i1> %c1 to i8 %z0 = zext i8 %b0 to i32 %z1 = zext i8 %b1 to i32 %s0 = shl nuw i32 %z0, 8 %or = or disjoint i32 %s0, %z1 ret i32 %or } define i64 @PR111431(<32 x i8> %a0, <32 x i8> %a1, <32 x i8> %a2) { ; SSE-LABEL: @PR111431( ; SSE-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> [[A0]], <64 x i32> ; SSE-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[A2:%.*]], <32 x i8> [[A1:%.*]], <64 x i32> ; SSE-NEXT: [[TMP3:%.*]] = icmp eq <64 x i8> [[TMP1]], [[TMP2]] ; SSE-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP3]] to i64 ; SSE-NEXT: ret i64 [[OR]] ; ; AVX2-LABEL: @PR111431( ; AVX2-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> [[A0]], <64 x i32> ; AVX2-NEXT: [[TMP2:%.*]] = shufflevector <32 x i8> [[A2:%.*]], <32 x i8> [[A1:%.*]], <64 x i32> ; AVX2-NEXT: [[TMP3:%.*]] = icmp eq <64 x i8> [[TMP1]], [[TMP2]] ; AVX2-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP3]] to i64 ; AVX2-NEXT: ret i64 [[OR]] ; ; AVX512-LABEL: @PR111431( ; AVX512-NEXT: [[C01:%.*]] = icmp eq <32 x i8> [[A0:%.*]], [[A1:%.*]] ; AVX512-NEXT: [[C02:%.*]] = icmp eq <32 x i8> [[A0]], [[A2:%.*]] ; AVX512-NEXT: [[TMP1:%.*]] = shufflevector <32 x i1> [[C02]], <32 x i1> [[C01]], <64 x i32> ; AVX512-NEXT: [[OR:%.*]] = bitcast <64 x i1> [[TMP1]] to i64 ; AVX512-NEXT: ret i64 [[OR]] ; %c01 = icmp eq <32 x i8> %a0, %a1 %c02 = icmp eq <32 x i8> %a0, %a2 %b01 = bitcast <32 x i1> %c01 to i32 %b02 = bitcast <32 x i1> %c02 to i32 %z01 = zext i32 %b01 to i64 %z02 = zext i32 %b02 to i64 %shl = shl nuw i64 %z01, 32 %or = or disjoint i64 %shl, %z02 ret i64 %or }