; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=skx | FileCheck %s --check-prefix=CHECK --check-prefix=VLX ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=knl | FileCheck %s --check-prefix=CHECK --check-prefix=NoVLX define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi0: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi2: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi3: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi4: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi5: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi6: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi7: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi8: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi9: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi10: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi11: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi12: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi13: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi14: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi15: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi16: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi17: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi18: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi19: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi20: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi21: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi22: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi23: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi24: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi25: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi26: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi27: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi28: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi29: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi30: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi31: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi32: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi33: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi34: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi35: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi36: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi37: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi38: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi39: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi40: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi41: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi42: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi43: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi44: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi45: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi46: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi47: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi48: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi49: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi50: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi51: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi52: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi53: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi54: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi55: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi56: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi57: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi58: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi59: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi60: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi61: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi62: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi63: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp eq <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi64: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi65: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi66: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi67: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi68: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi69: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqb (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi70: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi71: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi72: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; NoVLX-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi73: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi74: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi75: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; NoVLX-NEXT: vpcmpeqb (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp eq <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi76: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi77: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi78: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi79: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi80: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi81: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi82: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi83: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi84: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi85: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi86: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi87: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi88: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi89: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi90: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi91: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi92: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi93: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi94: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi95: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi96: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi97: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi98: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi99: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp eq <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi100: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi101: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi102: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi103: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi104: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi105: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi106: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi107: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi108: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi109: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi110: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi111: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi112: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi113: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi114: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi115: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi116: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi117: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi118: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi119: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi120: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi121: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi122: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi123: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi124: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi125: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi126: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi127: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi128: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi129: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi130: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi131: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi132: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi133: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi134: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi135: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi136: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi137: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi138: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi139: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi140: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi141: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi142: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi143: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi144: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi145: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi146: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi147: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi148: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi149: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi150: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi151: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi152: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi153: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi154: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi155: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi156: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi157: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi158: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi159: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi160: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi161: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi162: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi163: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp eq <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi164: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi165: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi166: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi167: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi168: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi169: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpeqw 32(%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpeqw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi170: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi171: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi172: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} ; NoVLX-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpeqw %ymm2, %ymm8, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi173: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi174: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi175: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 ; NoVLX-NEXT: vpcmpeqw (%rsi), %ymm3, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpcmpeqw 32(%rsi), %ymm4, %ymm4 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 ; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp eq <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi176: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi177: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi178: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi179: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi180: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi181: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi182: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi183: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi184: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi185: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi186: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi187: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi188: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi189: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi190: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi191: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi192: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi193: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi194: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi195: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi196: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi197: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi198: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi199: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi200: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi201: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi202: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi203: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi204: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi205: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi206: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi207: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi208: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi209: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi210: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi211: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp eq <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi212: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi213: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi214: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi215: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi216: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi217: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi218: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi219: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi220: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi221: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi222: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi223: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi224: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi225: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi226: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi227: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi228: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi229: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi230: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi231: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi232: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi233: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi234: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi235: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi236: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi237: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi238: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi239: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi240: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi241: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi242: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi243: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi244: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi245: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi246: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi247: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp eq <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi248: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi249: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi250: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi251: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi252: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi253: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi254: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi255: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi256: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi257: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi258: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi259: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi260: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi261: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi262: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi263: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi264: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi265: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi266: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi267: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi268: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi269: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi270: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi271: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi272: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi273: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi274: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi275: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi276: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi277: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi278: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi279: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi280: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi281: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi282: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi283: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi284: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi285: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi286: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi287: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi288: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi289: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi290: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi291: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi292: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi293: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi294: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi295: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi296: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi297: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi298: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi299: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi300: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi301: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi302: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi303: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi304: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi305: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi306: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi307: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi308: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi309: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi310: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi311: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi312: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi313: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi314: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi315: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi316: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi317: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi318: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi319: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi320: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi321: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi322: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi323: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi324: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi325: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi326: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi327: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi328: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi329: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi330: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi331: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi332: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi333: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi334: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi335: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi336: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi337: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi338: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi339: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi340: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi341: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi342: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi343: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp eq <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi344: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi345: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi346: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi347: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi348: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi349: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi350: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi351: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi352: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi353: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi354: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi355: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi356: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi357: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi358: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi359: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi360: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi361: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi362: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi363: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi364: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi365: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi366: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi367: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi368: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi369: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi370: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi371: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi372: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi373: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi374: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi375: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi376: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi377: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi378: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi379: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpeqq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp eq <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi380: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi381: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi382: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi383: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi384: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi385: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi386: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi387: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi388: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi389: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi390: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi391: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi392: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi393: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi394: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi395: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi396: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi397: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi398: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi399: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi400: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi401: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi402: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi403: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi404: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi405: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi406: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi407: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi408: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi409: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi410: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi411: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi412: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi413: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi414: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi415: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpeqq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp eq <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpeqq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi416: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi417: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi418: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi419: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi420: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi421: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi422: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi423: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi424: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi425: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi426: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi427: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpeqq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi428: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi429: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi430: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi431: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi432: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi433: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi434: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi435: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi436: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi437: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi438: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi439: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi440: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi441: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi442: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi443: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi444: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi445: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpeqq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi446: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi447: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi448: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpeqq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi449: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi450: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi451: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp eq <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi452: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi453: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi454: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi455: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi456: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi457: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi458: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi459: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi460: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi461: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi462: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi463: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi464: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi465: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi466: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi467: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi468: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi469: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi470: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi471: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi472: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi473: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi474: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi475: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi476: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi477: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi478: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi479: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi480: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi481: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi482: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi483: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi484: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi485: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi486: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi487: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi488: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi489: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi490: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi491: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi492: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi493: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi494: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi495: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi496: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi497: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi498: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi499: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi500: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi501: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi502: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi503: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi504: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi505: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi506: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi507: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi508: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi509: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi510: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi511: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi512: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi513: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi514: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi515: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sgt <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi516: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi517: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi518: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi519: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi520: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi521: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi522: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi523: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi524: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; NoVLX-NEXT: vpcmpgtb %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi525: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi526: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi527: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; NoVLX-NEXT: vpcmpgtb (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sgt <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi528: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi529: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi530: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi531: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi532: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi533: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi534: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi535: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi536: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi537: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi538: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi539: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi540: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi541: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi542: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi543: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi544: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi545: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi546: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi547: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi548: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi549: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi550: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi551: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sgt <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi552: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi553: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi554: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi555: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi556: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi557: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi558: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi559: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi560: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi561: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi562: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi563: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi564: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi565: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi566: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi567: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi568: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi569: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi570: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi571: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi572: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi573: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi574: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi575: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi576: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi577: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi578: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi579: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi580: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi581: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi582: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi583: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi584: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi585: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi586: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi587: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi588: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi589: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi590: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi591: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi592: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi593: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi594: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi595: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi596: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi597: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi598: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi599: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi600: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi601: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi602: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi603: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi604: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi605: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi606: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi607: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi608: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi609: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi610: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi611: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi612: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi613: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi614: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi615: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sgt <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi616: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi617: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi618: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi619: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi620: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi621: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw 32(%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpmovsxwd %ymm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpcmpgtw (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi622: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi623: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi624: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm8, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi625: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi626: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi627: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 ; NoVLX-NEXT: vpcmpgtw (%rsi), %ymm3, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpcmpgtw 32(%rsi), %ymm4, %ymm4 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 ; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sgt <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi628: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi629: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi630: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi631: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi632: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi633: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi634: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi635: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi636: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi637: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi638: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi639: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi640: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi641: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi642: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi643: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi644: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi645: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi646: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi647: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi648: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi649: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi650: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi651: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi652: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi653: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi654: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi655: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi656: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi657: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi658: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi659: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi660: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi661: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi662: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi663: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sgt <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi664: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi665: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi666: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi667: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi668: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi669: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi670: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi671: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi672: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi673: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi674: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi675: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi676: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi677: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi678: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi679: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi680: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi681: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi682: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi683: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi684: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi685: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi686: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi687: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi688: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi689: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi690: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi691: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi692: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi693: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi694: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi695: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi696: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi697: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi698: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi699: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sgt <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi700: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi701: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi702: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi703: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi704: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi705: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi706: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi707: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi708: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi709: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi710: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi711: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi712: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi713: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi714: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi715: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi716: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi717: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi718: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi719: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi720: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi721: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi722: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi723: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi724: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi725: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi726: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi727: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi728: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi729: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi730: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi731: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi732: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi733: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi734: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi735: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi736: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi737: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi738: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi739: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi740: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi741: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi742: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi743: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi744: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi745: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi746: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi747: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi748: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi749: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi750: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi751: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi752: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi753: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi754: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi755: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi756: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi757: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi758: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi759: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi760: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi761: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi762: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi763: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi764: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi765: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi766: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi767: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi768: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi769: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi770: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi771: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi772: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi773: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi774: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi775: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi776: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi777: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi778: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi779: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi780: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi781: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi782: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi783: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi784: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi785: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi786: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi787: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi788: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi789: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi790: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi791: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi792: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi793: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi794: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi795: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sgt <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi796: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi797: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi798: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi799: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi800: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi801: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi802: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi803: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi804: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi805: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi806: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi807: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi808: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi809: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi810: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi811: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi812: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi813: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi814: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi815: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi816: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi817: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi818: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi819: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi820: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi821: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi822: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi823: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi824: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi825: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rsi), %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi826: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi827: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi828: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi829: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi830: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi831: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sgt <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi832: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi833: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi834: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi835: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi836: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi837: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi838: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi839: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi840: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi841: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi842: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi843: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi844: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi845: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi846: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi847: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi848: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi849: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi850: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi851: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi852: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi853: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi854: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi855: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi856: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi857: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi858: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi859: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi860: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi861: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rsi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi862: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi863: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi864: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi865: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi866: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi867: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sgt <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgtq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi868: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi869: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi870: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi871: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi872: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi873: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi874: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi875: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi876: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi877: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi878: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi879: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgtq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi880: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi881: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi882: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi883: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi884: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi885: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi886: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi887: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi888: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi889: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi890: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi891: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi892: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi893: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi894: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi895: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi896: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi897: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgtq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi898: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi899: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi900: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgtq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi901: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi902: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi903: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sgt <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi904: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi905: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi906: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi907: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi908: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi909: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi910: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi911: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi912: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi913: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi914: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi915: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi916: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi917: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi918: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi919: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi920: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi921: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi922: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi923: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi924: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi925: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi926: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi927: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi928: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi929: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi930: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi931: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi932: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi933: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi934: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi935: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi936: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi937: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi938: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi939: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi940: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi941: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi942: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi943: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi944: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi945: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi946: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi947: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi948: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi949: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi950: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi951: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleb %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi952: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi953: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi954: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi955: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi956: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi957: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi958: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi959: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi960: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi961: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi962: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi963: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi964: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi965: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi966: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi967: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp sge <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi968: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi969: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi970: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltb (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi971: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi972: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi973: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleb %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi974: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi975: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi976: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltb (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi977: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi978: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi979: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm4 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 ; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp sge <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgew_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgew_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi980: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi981: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi982: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgew_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi983: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi984: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi985: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi986: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi987: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi988: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgew_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi989: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi990: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi991: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi992: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi993: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi994: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi995: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi996: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi997: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi998: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi999: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1000: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1001: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1002: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1003: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp sge <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1004: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1005: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1006: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1007: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1008: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1009: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1010: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1011: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgew_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1012: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1013: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1014: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1015: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1016: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1017: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1018: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1019: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1020: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1021: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1022: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1023: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1024: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1025: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1026: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1027: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgew_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1028: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1029: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1030: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1031: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1032: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1033: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1034: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1035: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1036: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1037: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1038: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1039: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1040: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1041: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1042: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1043: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1044: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1045: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1046: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1047: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1048: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1049: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1050: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1051: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1052: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1053: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1054: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1055: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1056: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1057: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1058: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1059: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1060: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1061: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1062: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1063: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1064: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1065: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1066: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1067: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp sge <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1068: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1069: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1070: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm1, %ymm2 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgew_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1071: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1072: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1073: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm1 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm2, %ymm0 ; NoVLX-NEXT: vmovdqa 32(%rdi), %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmplew %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1074: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1075: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1076: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm5, %xmm5 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm1 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm8 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm3, %ymm3 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm0 ; NoVLX-NEXT: vpternlogd $255, %zmm6, %zmm6, %zmm6 {%k2} {z} ; NoVLX-NEXT: vpcmpgtw %ymm1, %ymm3, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm6, %xmm6 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm8, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 ; NoVLX-NEXT: vpxor %ymm5, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm3 ; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpand %xmm6, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm0, %xmm3, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgew_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgew_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1077: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1078: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1079: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm5 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm3 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm5 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm5, %ymm3 ; NoVLX-NEXT: vmovdqa 32(%rsi), %ymm5 ; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4 ; NoVLX-NEXT: vpcmpeqd %ymm5, %ymm5, %ymm5 ; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 ; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp sge <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1080: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1081: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1082: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1083: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1084: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1085: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1086: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1087: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1088: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1089: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1090: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1091: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1092: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1093: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1094: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1095: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1096: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1097: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1098: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1099: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1100: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1101: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1102: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1103: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1104: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1105: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1106: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1107: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1108: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1109: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1110: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1111: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1112: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1113: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1114: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1115: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp sge <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1116: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1117: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1118: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1119: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1120: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1121: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1122: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1123: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1124: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1125: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1126: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1127: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1128: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1129: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1130: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1131: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1132: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1133: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1134: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1135: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1136: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1137: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1138: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1139: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1140: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1141: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1142: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1143: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1144: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1145: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1146: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1147: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1148: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1149: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1150: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1151: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp sge <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1152: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1153: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1154: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1155: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1156: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1157: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1158: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1159: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1160: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1161: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1162: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1163: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1164: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1165: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1166: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1167: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1168: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1169: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1170: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1171: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1172: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1173: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1174: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1175: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1176: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1177: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1178: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1179: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1180: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1181: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1182: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1183: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsged_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1184: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1185: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1186: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1187: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1188: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1189: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1190: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1191: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1192: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1193: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1194: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1195: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1196: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1197: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1198: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1199: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1200: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1201: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1202: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1203: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1204: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1205: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1206: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1207: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1208: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1209: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1210: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1211: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1212: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1213: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1214: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1215: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpnltd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1216: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1217: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1218: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1219: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1220: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1221: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1222: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1223: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1224: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1225: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1226: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1227: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1228: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1229: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1230: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1231: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltd (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsged_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1232: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1233: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1234: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1235: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1236: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1237: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1238: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1239: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsged_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1240: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1241: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1242: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1243: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1244: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1245: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1246: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1247: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpbroadcastd (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpled %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp sge <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1248: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1249: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1250: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1251: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1252: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1253: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1254: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1255: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1256: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1257: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1258: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1259: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1260: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1261: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1262: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1263: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1264: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1265: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1266: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1267: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1268: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1269: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1270: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1271: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1272: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1273: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1274: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1275: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1276: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1277: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1278: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1279: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1280: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpcmpeqd %xmm1, %xmm1, %xmm1 ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %xmm0, %xmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1281: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1282: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1283: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpandn %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp sge <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1284: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1285: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1286: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1287: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1288: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1289: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1290: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1291: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1292: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1293: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1294: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1295: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1296: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1297: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1298: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1299: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1300: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1301: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1302: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1303: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1304: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1305: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1306: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1307: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1308: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1309: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1310: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1311: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1312: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1313: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1314: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1315: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1316: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %ymm0, %ymm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1317: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1318: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1319: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp sge <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpsgeq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1320: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1321: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1322: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1323: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1324: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1325: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1326: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1327: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1328: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1329: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1330: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1331: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpsgeq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1332: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1333: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1334: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1335: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1336: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1337: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1338: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1339: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1340: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1341: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1342: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1343: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpnltq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1344: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1345: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1346: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1347: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1348: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1349: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpnltq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpsgeq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1350: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1351: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1352: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %zmm1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpsgeq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1353: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1354: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1355: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %zmm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpleq %zmm0, %zmm1, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp sge <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1356: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1357: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1358: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1359: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1360: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1361: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1362: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1363: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultb_v16i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1364: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1365: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1366: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1367: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1368: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1369: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1370: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1371: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1372: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1373: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1374: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1375: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1376: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1377: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1378: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1379: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultb_v16i1_v32i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1380: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1381: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1382: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1383: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1384: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1385: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1386: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1387: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1388: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1389: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1390: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1391: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1392: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1393: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1394: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1395: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultb_v16i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1396: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1397: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1398: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1399: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1400: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1401: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1402: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1403: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask(i16 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1404: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1405: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1406: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1407: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1408: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1409: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1410: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1411: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %1 = bitcast <2 x i64> %__b to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultb_v16i1_v64i1_mask_mem(i16 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1412: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1413: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1414: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1415: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1416: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1417: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1418: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1419: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtb %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <16 x i8> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <16 x i8> %2 = icmp ult <16 x i8> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1420: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1421: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1422: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultb_v32i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltub (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1423: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1424: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1425: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask(i32 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1426: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1427: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1428: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpternlogd $255, %zmm3, %zmm3, %zmm3 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm3, %xmm3 ; NoVLX-NEXT: vpxord %zmm4, %zmm4, %zmm4 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm5, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm5, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpand %xmm3, %xmm1, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %1 = bitcast <4 x i64> %__b to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultb_v32i1_v64i1_mask_mem(i32 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltub (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultb_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1429: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1430: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1431: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm1, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm2, %xmm2 ; NoVLX-NEXT: vpxord %zmm3, %zmm3, %zmm3 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm4 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128] ; NoVLX-NEXT: vpxor %ymm4, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm4, %ymm4 ; NoVLX-NEXT: vpcmpgtb %ymm0, %ymm4, %ymm0 ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <32 x i8> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <32 x i8> %2 = icmp ult <32 x i8> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultw_v8i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultw_v8i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kunpckbw %k0, %k1, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1432: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1433: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1434: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultw_v8i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1435: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1436: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1437: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1438: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1439: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1440: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultw_v8i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1441: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1442: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1443: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1444: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1445: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1446: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v8i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1447: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1448: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1449: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1450: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1451: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1452: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %1 = bitcast <2 x i64> %__b to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v8i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1453: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1454: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1455: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtw %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpmovsxwq %xmm0, %zmm0 ; NoVLX-NEXT: vpsllq $63, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <8 x i16> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <8 x i16> %2 = icmp ult <8 x i16> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1456: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1457: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1458: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1459: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1460: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1461: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1462: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1463: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultw_v16i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1464: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1465: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1466: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1467: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1468: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1469: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1470: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1471: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1472: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1473: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1474: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1475: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1476: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1477: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1478: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1479: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultw_v16i1_v32i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1480: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1481: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1482: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1483: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1484: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1485: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1486: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1487: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1488: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1489: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1490: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1491: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1492: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1493: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1494: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1495: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v16i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1496: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1497: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1498: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1499: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1500: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1501: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1502: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1503: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask(i16 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1504: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1505: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1506: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1507: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1508: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1509: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1510: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1511: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %1 = bitcast <4 x i64> %__b to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v16i1_v64i1_mask_mem(i16 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1512: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1513: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1514: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1515: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1516: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1517: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1518: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1519: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <16 x i16> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <16 x i16> %2 = icmp ult <16 x i16> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1520: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1521: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1522: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm3 ; NoVLX-NEXT: vmovq %xmm3, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm7 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm2, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm5 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm5, %xmm5 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm5, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm0, %ymm0 ; NoVLX-NEXT: vinserti128 $1, %xmm5, %ymm7, %ymm3 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm1, %ymm1 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm4 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm4, %xmm4 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm4, %ymm4 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm2 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm2, %ymm3, %ymm3 ; NoVLX-NEXT: vpxor %ymm2, %ymm4, %ymm4 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm4, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpmovsxbd %xmm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultw_v32i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuw (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1523: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1524: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1525: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm1 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm1 ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm4 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm1, %ymm2 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm1 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm1, %ymm2, %ymm2 ; NoVLX-NEXT: vpxor 32(%rdi), %ymm1, %ymm3 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtw %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovsxwd %ymm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = shufflevector <32 x i1> %2, <32 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask(i32 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1526: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1527: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1528: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm2 ; NoVLX-NEXT: vmovq %xmm2, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: vextracti32x4 $1, %zmm1, %xmm8 ; NoVLX-NEXT: vextracti32x4 $2, %zmm1, %xmm5 ; NoVLX-NEXT: vextracti32x4 $3, %zmm1, %xmm7 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm6 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm3 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm2, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm2 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm2, %xmm2 ; NoVLX-NEXT: vmovq %xmm3, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm2, %xmm9 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpextrq $1, %xmm3, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm4, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm6, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm4 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm6, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: vmovq %xmm7, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm7, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm7 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm8, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm8, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vmovq %xmm1, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm3 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: vinserti128 $1, %xmm9, %ymm4, %ymm8 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm0, %ymm4 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm0 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} ; NoVLX-NEXT: vinserti128 $1, %xmm7, %ymm5, %ymm5 ; NoVLX-NEXT: vpmovdb %zmm1, %xmm7 ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm3, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm3 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm3, %xmm3 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vinserti128 $1, %xmm2, %ymm3, %ymm2 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm6 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm6, %ymm4, %ymm3 ; NoVLX-NEXT: vpxor %ymm6, %ymm2, %ymm2 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm2, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm4 ; NoVLX-NEXT: vpxor %ymm6, %ymm8, %ymm2 ; NoVLX-NEXT: vpxor %ymm6, %ymm5, %ymm3 ; NoVLX-NEXT: vpcmpgtw %ymm2, %ymm3, %ymm2 ; NoVLX-NEXT: vpmovsxwd %ymm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm2 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vpand %xmm7, %xmm2, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm0, %xmm4, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %1 = bitcast <8 x i64> %__b to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultw_v32i1_v64i1_mask_mem(i32 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuw (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultw_v32i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1529: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1530: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1531: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $96, %rsp ; NoVLX-NEXT: movl %edi, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vextracti32x4 $3, %zmm0, %xmm1 ; NoVLX-NEXT: vmovq %xmm1, %rax ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: movq %rax, %rdx ; NoVLX-NEXT: vmovd %eax, %xmm2 ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm2, %xmm2 ; NoVLX-NEXT: vextracti32x4 $1, %zmm0, %xmm4 ; NoVLX-NEXT: vextracti32x4 $2, %zmm0, %xmm5 ; NoVLX-NEXT: shrq $32, %rdx ; NoVLX-NEXT: vpinsrw $2, %edx, %xmm2, %xmm2 ; NoVLX-NEXT: vpextrq $1, %xmm1, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm2, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm5, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm3 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm5, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm4, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm5 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm4, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm1, %xmm1 ; NoVLX-NEXT: vmovq %xmm0, %rcx ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm1, %xmm6 ; NoVLX-NEXT: movl %ecx, %eax ; NoVLX-NEXT: shrl $16, %eax ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrw $1, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: movq %rcx, %rax ; NoVLX-NEXT: shrq $32, %rax ; NoVLX-NEXT: vpinsrw $2, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpextrq $1, %xmm0, %rax ; NoVLX-NEXT: shrq $48, %rcx ; NoVLX-NEXT: vpinsrw $3, %ecx, %xmm1, %xmm0 ; NoVLX-NEXT: movl %eax, %ecx ; NoVLX-NEXT: shrl $16, %ecx ; NoVLX-NEXT: vpinsrw $4, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: movq %rax, %rcx ; NoVLX-NEXT: shrq $32, %rcx ; NoVLX-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm7 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k1 ; NoVLX-NEXT: kmovw {{[0-9]+}}(%rsp), %k2 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k2} {z} ; NoVLX-NEXT: vpmovdb %zmm0, %xmm2 ; NoVLX-NEXT: shrq $48, %rax ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vinserti128 $1, %xmm3, %ymm5, %ymm4 ; NoVLX-NEXT: vpinsrw $7, %eax, %xmm7, %xmm3 ; NoVLX-NEXT: vinserti128 $1, %xmm6, %ymm3, %ymm3 ; NoVLX-NEXT: vmovdqa {{.*#+}} ymm5 = [32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768,32768] ; NoVLX-NEXT: vpxor %ymm5, %ymm3, %ymm3 ; NoVLX-NEXT: vpxor (%rsi), %ymm5, %ymm6 ; NoVLX-NEXT: vpcmpgtw %ymm3, %ymm6, %ymm3 ; NoVLX-NEXT: vpmovsxwd %ymm3, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm3 ; NoVLX-NEXT: vptestmd %zmm3, %zmm3, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %eax, %xmm3 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm3, %xmm3 ; NoVLX-NEXT: vpxor %ymm5, %ymm4, %ymm4 ; NoVLX-NEXT: vpxor 32(%rsi), %ymm5, %ymm5 ; NoVLX-NEXT: vpcmpgtw %ymm4, %ymm5, %ymm4 ; NoVLX-NEXT: vpmovsxwd %ymm4, %zmm4 ; NoVLX-NEXT: vpslld $31, %zmm4, %zmm4 ; NoVLX-NEXT: vptestmd %zmm4, %zmm4, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm4 ; NoVLX-NEXT: vpinsrb $1, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $2, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $3, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $6, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $9, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $10, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $13, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $14, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm4, %xmm4 ; NoVLX-NEXT: vpand %xmm2, %xmm4, %xmm2 ; NoVLX-NEXT: vpmovsxbd %xmm2, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm2 ; NoVLX-NEXT: vptestmd %zmm2, %zmm2, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpand %xmm1, %xmm3, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %ecx ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: shlq $32, %rax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <32 x i16> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <32 x i16> %2 = icmp ult <32 x i16> %0, %1 %3 = bitcast i32 %__u to <32 x i1> %4 = and <32 x i1> %2, %3 %5 = shufflevector <32 x i1> %4, <32 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1532: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1533: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1534: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1535: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1536: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1537: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1538: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1539: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1540: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1541: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1542: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1543: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1544: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1545: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1546: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1547: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1548: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1549: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1550: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1551: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1552: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1553: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1554: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1555: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1556: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1557: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1558: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %1 = bitcast <2 x i64> %__b to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1559: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1560: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1561: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1562: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1563: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1564: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to4}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1565: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1566: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1567: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: vpbroadcastd {{.*#+}} xmm2 = [2147483648,2147483648,2147483648,2147483648] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x i32> %load = load i32, i32* %__b %vec = insertelement <4 x i32> undef, i32 %load, i32 0 %1 = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> %2 = icmp ult <4 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1568: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1569: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1570: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1571: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1572: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1573: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1574: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1575: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1576: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1577: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1578: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1579: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1580: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1581: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1582: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1583: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1584: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1585: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1586: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1587: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1588: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1589: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1590: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1591: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1592: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1593: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1594: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %1 = bitcast <4 x i64> %__b to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1595: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1596: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1597: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovdqa (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k1, %k0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1598: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1599: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1600: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to8}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1601: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1602: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1603: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: kandw %k0, %k1, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x i32> %load = load i32, i32* %__b %vec = insertelement <8 x i32> undef, i32 %load, i32 0 %1 = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> %2 = icmp ult <8 x i32> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1604: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1605: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1606: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1607: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1608: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1609: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1610: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1611: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1612: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1613: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1614: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1615: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1616: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1617: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1618: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1619: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1620: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1621: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1622: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1623: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1624: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1625: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1626: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1627: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1628: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1629: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1630: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1631: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1632: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1633: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1634: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1635: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultd_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1636: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1637: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1638: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1639: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1640: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1641: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1642: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1643: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1644: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1645: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1646: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1647: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1648: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1649: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1650: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1651: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1652: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1653: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1654: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1655: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1656: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1657: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1658: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1659: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1660: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1661: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1662: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1663: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1664: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1665: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1666: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1667: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask(i16 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1668: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1669: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1670: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1671: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1672: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1673: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1674: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1675: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %1 = bitcast <8 x i64> %__b to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem(i16 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1676: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1677: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1678: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1679: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1680: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1681: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1682: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1683: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %2, %3 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultd_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1684: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1685: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1686: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1687: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1688: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1689: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1690: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1691: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b(i16 zeroext %__u, <8 x i64> %__a, i32* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultd_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1692: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1693: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1694: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1695: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1696: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1697: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1698: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1699: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x i32> %load = load i32, i32* %__b %vec = insertelement <16 x i32> undef, i32 %load, i32 0 %1 = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> %2 = icmp ult <16 x i32> %0, %1 %3 = bitcast i16 %__u to <16 x i1> %4 = and <16 x i1> %3, %2 %5 = shufflevector <16 x i1> %4, <16 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <4 x i32> %6 = bitcast <4 x i1> %5 to i4 ret i4 %6 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1700: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1701: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1702: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1703: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1704: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1705: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1706: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1707: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1708: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1709: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1710: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1711: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1712: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1713: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1714: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1715: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1716: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1717: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1718: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1719: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1720: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1721: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1722: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1723: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rdi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask(i8 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %xmm1, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1724: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1725: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1726: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %1 = bitcast <2 x i64> %__b to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem(i8 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1727: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1728: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1729: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm1 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor (%rsi), %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x i64> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %2, %extract.i %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1730: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1731: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1732: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, <2 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to2}, %xmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1733: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1734: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1735: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: vmovdqa {{.*#+}} xmm2 = [9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %xmm2, %xmm0, %xmm0 ; NoVLX-NEXT: vpxor %xmm2, %xmm1, %xmm1 ; NoVLX-NEXT: vpcmpgtq %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x i64> %load = load i64, i64* %__b %vec = insertelement <2 x i64> undef, i64 %load, i32 0 %1 = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> %2 = icmp ult <2 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <2 x i32> %4 = and <2 x i1> %extract.i, %2 %5 = shufflevector <2 x i1> %4, <2 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <8 x i32> %6 = bitcast <8 x i1> %5 to i8 ret i8 %6 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1736: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1737: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1738: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1739: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1740: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1741: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1742: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1743: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1744: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1745: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1746: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1747: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1748: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1749: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1750: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1751: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1752: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1753: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1754: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1755: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1756: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1757: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1758: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1759: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rdi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask(i8 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %ymm1, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1760: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1761: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1762: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %1 = bitcast <4 x i64> %__b to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem(i8 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1763: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1764: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1765: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor (%rsi), %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x i64> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %2, %extract.i %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1766: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1767: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1768: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, <4 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to4}, %ymm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1769: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1770: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1771: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: vpbroadcastq {{.*#+}} ymm2 = [9223372036854775808,9223372036854775808,9223372036854775808,9223372036854775808] ; NoVLX-NEXT: vpxor %ymm2, %ymm0, %ymm0 ; NoVLX-NEXT: vpxor %ymm2, %ymm1, %ymm1 ; NoVLX-NEXT: vpcmpgtq %ymm0, %ymm1, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: kmovw %edi, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kshiftlw $13, %k0, %k2 ; NoVLX-NEXT: kshiftrw $15, %k2, %k2 ; NoVLX-NEXT: kshiftlw $15, %k0, %k3 ; NoVLX-NEXT: kshiftrw $15, %k3, %k3 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: kmovw %k3, %ecx ; NoVLX-NEXT: vmovd %ecx, %xmm1 ; NoVLX-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k2, %eax ; NoVLX-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: vpinsrb $12, %eax, %xmm1, %xmm1 ; NoVLX-NEXT: vpand %xmm0, %xmm1, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x i64> %load = load i64, i64* %__b %vec = insertelement <4 x i64> undef, i64 %load, i32 0 %1 = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> %2 = icmp ult <4 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %extract.i = shufflevector <8 x i1> %3, <8 x i1> undef, <4 x i32> %4 = and <4 x i1> %extract.i, %2 %5 = shufflevector <4 x i1> %4, <4 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i16 @test_vpcmpultq_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <16 x i32> %6 = bitcast <16 x i1> %5 to i16 ret i16 %6 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1772: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1773: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1774: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1775: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1776: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1777: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1778: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1779: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1780: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1781: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1782: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1783: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i32 @test_vpcmpultq_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1784: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1785: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1786: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1787: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1788: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1789: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <32 x i32> %6 = bitcast <32 x i1> %5 to i32 ret i32 %6 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1790: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1791: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1792: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1793: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1794: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1795: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask(i8 zeroext %__u, <8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1796: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1797: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1798: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %1 = bitcast <8 x i64> %__b to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem(i8 zeroext %__u, <8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1799: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1800: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1801: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi), %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x i64> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %2, %3 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } define zeroext i64 @test_vpcmpultq_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1802: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1803: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1804: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, <8 x i64> %__a, i64* %__b) local_unnamed_addr { ; VLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: kmovd %edi, %k1 ; VLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_masked_vpcmpultq_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1805: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1806: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1807: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: kmovw %edi, %k1 ; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x i64> %load = load i64, i64* %__b %vec = insertelement <8 x i64> undef, i64 %load, i32 0 %1 = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> %2 = icmp ult <8 x i64> %0, %1 %3 = bitcast i8 %__u to <8 x i1> %4 = and <8 x i1> %3, %2 %5 = shufflevector <8 x i1> %4, <8 x i1> zeroinitializer, <64 x i32> %6 = bitcast <64 x i1> %5 to i64 ret i64 %6 } declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32) define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1808: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1809: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1810: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1811: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1812: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1813: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1814: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1815: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1816: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1817: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1818: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1819: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %1 = bitcast <2 x i64> %__b to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1820: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1821: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1822: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqps (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <4 x float> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to4}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1823: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1824: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1825: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 ; NoVLX-NEXT: vcmpeqps %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <4 x float> %load = load float, float* %__b %vec = insertelement <4 x float> undef, float %load, i32 0 %1 = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> %2 = fcmp oeq <4 x float> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1826: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1827: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1828: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1829: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1830: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1831: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1832: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1833: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1834: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1835: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1836: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1837: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM1 %YMM1 %ZMM1 ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %1 = bitcast <4 x i64> %__b to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1838: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1839: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1840: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vmovaps (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <8 x float> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to8}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1841: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1842: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1843: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: # kill: %YMM0 %YMM0 %ZMM0 ; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <8 x float> %load = load float, float* %__b %vec = insertelement <8 x float> undef, float %load, i32 0 %1 = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> %2 = fcmp oeq <8 x float> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1844: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1845: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1846: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1847: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1848: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1849: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1850: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1851: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1852: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1853: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1854: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1855: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1856: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1857: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1858: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1859: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1860: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1861: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1862: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: .Lcfi1863: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1864: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1865: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1866: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1867: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqps_v16i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovw %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v32i1_sae_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8) %3 = zext i16 %2 to i32 ret i32 %3 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1868: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1869: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1870: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1871: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1872: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1873: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1874: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1875: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1876: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1877: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1878: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1879: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1880: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1881: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1882: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1883: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <16 x float> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_mask_mem_b(<8 x i64> %__a, float* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1884: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1885: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1886: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: pushq %r15 ; NoVLX-NEXT: pushq %r14 ; NoVLX-NEXT: pushq %r13 ; NoVLX-NEXT: pushq %r12 ; NoVLX-NEXT: pushq %rbx ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: .Lcfi1887: ; NoVLX-NEXT: .cfi_offset %rbx, -56 ; NoVLX-NEXT: .Lcfi1888: ; NoVLX-NEXT: .cfi_offset %r12, -48 ; NoVLX-NEXT: .Lcfi1889: ; NoVLX-NEXT: .cfi_offset %r13, -40 ; NoVLX-NEXT: .Lcfi1890: ; NoVLX-NEXT: .cfi_offset %r14, -32 ; NoVLX-NEXT: .Lcfi1891: ; NoVLX-NEXT: .cfi_offset %r15, -24 ; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r11d ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r14d ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r15d ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r12d ; NoVLX-NEXT: kshiftlw $8, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r13d ; NoVLX-NEXT: kshiftlw $7, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $6, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ebx ; NoVLX-NEXT: kshiftlw $5, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $4, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $3, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $2, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vmovd %r10d, %xmm0 ; NoVLX-NEXT: kmovw %k1, %r10d ; NoVLX-NEXT: kshiftlw $1, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: vpinsrb $1, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpinsrb $2, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %r11d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %r14d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %r15d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %r12d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $7, %r13d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $8, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $9, %ebx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $10, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $12, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $13, %r10d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: leaq -40(%rbp), %rsp ; NoVLX-NEXT: popq %rbx ; NoVLX-NEXT: popq %r12 ; NoVLX-NEXT: popq %r13 ; NoVLX-NEXT: popq %r14 ; NoVLX-NEXT: popq %r15 ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %load = load float, float* %__b %vec = insertelement <16 x float> undef, float %load, i32 0 %1 = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> %2 = fcmp oeq <16 x float> %0, %1 %3 = shufflevector <16 x i1> %2, <16 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqps_v16i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: movzwl %ax, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqps_v16i1_v64i1_sae_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpleps {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzwl %ax, %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <16 x float> %1 = bitcast <8 x i64> %__b to <16 x float> %2 = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %0, <16 x float> %1, i32 2, i16 -1, i32 8) %3 = zext i16 %2 to i64 ret i64 %3 } declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32) define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp) ; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,2],zero,zero ; NoVLX-NEXT: vpslld $31, %ymm0, %ymm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movb %al, -{{[0-9]+}}(%rsp) ; NoVLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <4 x i32> %4 = bitcast <4 x i1> %3 to i4 ret i4 %4 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm0, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1892: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1893: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1894: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1895: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1896: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1897: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1898: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1899: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1900: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask(<2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1901: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1902: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1903: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %1 = bitcast <2 x i64> %__b to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem(<2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1904: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1905: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1906: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load <2 x i64>, <2 x i64>* %__b %1 = bitcast <2 x i64> %load to <2 x double> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to2}, %xmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1907: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1908: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1909: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] ; NoVLX-NEXT: vcmpeqpd %xmm1, %xmm0, %xmm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,8],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <2 x i64> %__a to <2 x double> %load = load double, double* %__b %vec = insertelement <2 x double> undef, double %load, i32 0 %1 = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> %2 = fcmp oeq <2 x double> %0, %1 %3 = shufflevector <2 x i1> %2, <2 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AL %AL %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kshiftlw $7, %k0, %k0 ; NoVLX-NEXT: kshiftrw $7, %k0, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,8,2,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,1,8,3,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpsllq $63, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmq %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,2,8,4,5,6,7] ; NoVLX-NEXT: vpermi2q %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpsllq $63, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmq %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AL %AL %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <8 x i32> %4 = bitcast <8 x i1> %3 to i8 ret i8 %4 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpextrb $4, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $0, %xmm0, %eax ; NoVLX-NEXT: andl $1, %eax ; NoVLX-NEXT: kmovw %eax, %k0 ; NoVLX-NEXT: kxorw %k0, %k0, %k1 ; NoVLX-NEXT: kshiftrw $1, %k1, %k1 ; NoVLX-NEXT: kshiftlw $1, %k1, %k1 ; NoVLX-NEXT: korw %k0, %k1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,16,2,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm1, %zmm2, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $8, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm2, %zmm2, %zmm2 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm3 = [0,1,16,3,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm2, %zmm1, %zmm3 ; NoVLX-NEXT: vpslld $31, %zmm3, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k1} {z} ; NoVLX-NEXT: vpextrb $12, %xmm0, %eax ; NoVLX-NEXT: kmovw %eax, %k1 ; NoVLX-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} ; NoVLX-NEXT: vmovdqa32 {{.*#+}} zmm2 = [0,1,2,16,4,5,6,7,8,9,10,11,12,13,14,15] ; NoVLX-NEXT: vpermi2d %zmm0, %zmm1, %zmm2 ; NoVLX-NEXT: vpslld $31, %zmm2, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1910: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1911: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1912: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1913: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1914: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1915: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1916: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1917: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1918: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask(<4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1919: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1920: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1921: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %1 = bitcast <4 x i64> %__b to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem(<4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1922: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1923: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1924: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load <4 x i64>, <4 x i64>* %__b %1 = bitcast <4 x i64> %load to <4 x double> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to4}, %ymm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1925: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1926: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1927: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 ; NoVLX-NEXT: vcmpeqpd %ymm1, %ymm0, %ymm0 ; NoVLX-NEXT: vpmovqd %zmm0, %ymm0 ; NoVLX-NEXT: vpxord %zmm1, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; NoVLX-NEXT: vextracti128 $1, %ymm0, %xmm1 ; NoVLX-NEXT: vpmovsxbd %xmm1, %zmm1 ; NoVLX-NEXT: vpslld $31, %zmm1, %zmm1 ; NoVLX-NEXT: vptestmd %zmm1, %zmm1, %k0 ; NoVLX-NEXT: kmovw %k0, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <4 x i64> %__a to <4 x double> %load = load double, double* %__b %vec = insertelement <4 x double> undef, double %load, i32 0 %1 = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> %2 = fcmp oeq <4 x double> %0, %1 %3 = shufflevector <4 x i1> %2, <4 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <16 x i32> %4 = bitcast <16 x i1> %3 to i16 ret i16 %4 } define zeroext i16 @test_vcmpoeqpd_v8i1_v16i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: # kill: %AX %AX %EAX ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v16i1_sae_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: # kill: %AX %AX %EAX ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) %3 = zext i8 %2 to i16 ret i16 %3 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1928: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1929: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1930: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1931: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1932: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1933: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1934: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1935: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1936: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $32, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <32 x i32> %4 = bitcast <32 x i1> %3 to i32 ret i32 %4 } define zeroext i32 @test_vcmpoeqpd_v8i1_v32i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovb %k0, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v32i1_sae_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) %3 = zext i8 %2 to i32 ret i32 %3 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1937: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1938: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1939: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem(<8 x i64> %__a, <8 x i64>* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1940: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1941: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1942: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi), %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load <8 x i64>, <8 x i64>* %__b %1 = bitcast <8 x i64> %load to <8 x double> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_mask_mem_b(<8 x i64> %__a, double* %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; VLX-NEXT: kmovq %k0, %rax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_mask_mem_b: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: pushq %rbp ; NoVLX-NEXT: .Lcfi1943: ; NoVLX-NEXT: .cfi_def_cfa_offset 16 ; NoVLX-NEXT: .Lcfi1944: ; NoVLX-NEXT: .cfi_offset %rbp, -16 ; NoVLX-NEXT: movq %rsp, %rbp ; NoVLX-NEXT: .Lcfi1945: ; NoVLX-NEXT: .cfi_def_cfa_register %rbp ; NoVLX-NEXT: andq $-32, %rsp ; NoVLX-NEXT: subq $64, %rsp ; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: vpxord %zmm0, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k1 ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kmovw %k1, {{[0-9]+}}(%rsp) ; NoVLX-NEXT: kshiftlw $15, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r8d ; NoVLX-NEXT: kshiftlw $14, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %r9d ; NoVLX-NEXT: kshiftlw $13, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edx ; NoVLX-NEXT: kshiftlw $12, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %esi ; NoVLX-NEXT: kshiftlw $11, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %edi ; NoVLX-NEXT: kshiftlw $10, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %eax ; NoVLX-NEXT: kshiftlw $9, %k0, %k1 ; NoVLX-NEXT: kshiftrw $15, %k1, %k1 ; NoVLX-NEXT: kmovw %k1, %ecx ; NoVLX-NEXT: vpxor %xmm0, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $0, %r8d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $1, %r9d, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $2, %edx, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $3, %esi, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $4, %edi, %xmm0, %xmm0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $15, %k0, %k0 ; NoVLX-NEXT: vpinsrb $5, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: vpinsrb $7, %eax, %xmm0, %xmm0 ; NoVLX-NEXT: vpmovsxbd %xmm0, %zmm0 ; NoVLX-NEXT: vpslld $31, %zmm0, %zmm0 ; NoVLX-NEXT: vptestmd %zmm0, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, (%rsp) ; NoVLX-NEXT: movl {{[0-9]+}}(%rsp), %ecx ; NoVLX-NEXT: shlq $32, %rcx ; NoVLX-NEXT: movl (%rsp), %eax ; NoVLX-NEXT: orq %rcx, %rax ; NoVLX-NEXT: movq %rbp, %rsp ; NoVLX-NEXT: popq %rbp ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %load = load double, double* %__b %vec = insertelement <8 x double> undef, double %load, i32 0 %1 = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> %2 = fcmp oeq <8 x double> %0, %1 %3 = shufflevector <8 x i1> %2, <8 x i1> zeroinitializer, <64 x i32> %4 = bitcast <64 x i1> %3 to i64 ret i64 %4 } define zeroext i64 @test_vcmpoeqpd_v8i1_v64i1_sae_mask(<8 x i64> %__a, <8 x i64> %__b) local_unnamed_addr { ; VLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: ; VLX: # BB#0: # %entry ; VLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; VLX-NEXT: kmovd %k0, %eax ; VLX-NEXT: movzbl %al, %eax ; VLX-NEXT: vzeroupper ; VLX-NEXT: retq ; ; NoVLX-LABEL: test_vcmpoeqpd_v8i1_v64i1_sae_mask: ; NoVLX: # BB#0: # %entry ; NoVLX-NEXT: vcmplepd {sae}, %zmm1, %zmm0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax ; NoVLX-NEXT: movzbl %al, %eax ; NoVLX-NEXT: retq entry: %0 = bitcast <8 x i64> %__a to <8 x double> %1 = bitcast <8 x i64> %__b to <8 x double> %2 = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %0, <8 x double> %1, i32 2, i8 -1, i32 8) %3 = zext i8 %2 to i64 ret i64 %3 }