llvm-project/llvm/test/CodeGen/X86/avx512-calling-conv.ll

3324 lines
120 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL --check-prefix=KNL-NEW
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=knl -x86-enable-old-knl-abi | FileCheck %s --check-prefix=ALL_X64 --check-prefix=KNL --check-prefix=KNL-OLD
; RUN: llc < %s -mtriple=x86_64-apple-darwin9 -mcpu=skx | FileCheck %s --check-prefix=ALL_X64 --check-prefix=SKX
; RUN: llc < %s -mtriple=i686-apple-darwin9 -mcpu=knl | FileCheck %s --check-prefix=KNL_X32
define <16 x i1> @test1() {
; ALL_X64-LABEL: test1:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: vxorps %xmm0, %xmm0, %xmm0
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test1:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: vxorps %xmm0, %xmm0, %xmm0
; KNL_X32-NEXT: retl
ret <16 x i1> zeroinitializer
}
define <16 x i1> @test2(<16 x i1>%a, <16 x i1>%b) {
; ALL_X64-LABEL: test2:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test2:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <16 x i1>%a, %b
ret <16 x i1> %c
}
define <8 x i1> @test3(<8 x i1>%a, <8 x i1>%b) {
; ALL_X64-LABEL: test3:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test3:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <8 x i1>%a, %b
ret <8 x i1> %c
}
define <4 x i1> @test4(<4 x i1>%a, <4 x i1>%b) {
; ALL_X64-LABEL: test4:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: vandps %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test4:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: vandps %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT: retl
%c = and <4 x i1>%a, %b
ret <4 x i1> %c
}
declare <8 x i1> @func8xi1(<8 x i1> %a)
define <8 x i32> @test5(<8 x i32>%a, <8 x i32>%b) {
; KNL-LABEL: test5:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: callq _func8xi1
; KNL-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL-NEXT: vpslld $31, %ymm0, %ymm0
; KNL-NEXT: vpsrad $31, %ymm0, %ymm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: test5:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _func8xi1
; SKX-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; SKX-NEXT: vpslld $31, %ymm0, %ymm0
; SKX-NEXT: vpsrad $31, %ymm0, %ymm0
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test5:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: subl $12, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL_X32-NEXT: calll _func8xi1
; KNL_X32-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
; KNL_X32-NEXT: vpslld $31, %ymm0, %ymm0
; KNL_X32-NEXT: vpsrad $31, %ymm0, %ymm0
; KNL_X32-NEXT: addl $12, %esp
; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <8 x i32>%a, %b
%resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
%res = sext <8 x i1>%resi to <8 x i32>
ret <8 x i32> %res
}
declare <16 x i1> @func16xi1(<16 x i1> %a)
define <16 x i32> @test6(<16 x i32>%a, <16 x i32>%b) {
; KNL-LABEL: test6:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
; KNL-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL-NEXT: vpmovdb %zmm0, %xmm0
; KNL-NEXT: callq _func16xi1
; KNL-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL-NEXT: vpslld $31, %zmm0, %zmm0
; KNL-NEXT: vpsrad $31, %zmm0, %zmm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: test6:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0
; SKX-NEXT: vpmovm2b %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _func16xi1
; SKX-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; SKX-NEXT: vpslld $31, %zmm0, %zmm0
; SKX-NEXT: vpsrad $31, %zmm0, %zmm0
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test6:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: subl $12, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %zmm1, %zmm0, %k1
; KNL_X32-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z}
; KNL_X32-NEXT: vpmovdb %zmm0, %xmm0
; KNL_X32-NEXT: calll _func16xi1
; KNL_X32-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
; KNL_X32-NEXT: vpslld $31, %zmm0, %zmm0
; KNL_X32-NEXT: vpsrad $31, %zmm0, %zmm0
; KNL_X32-NEXT: addl $12, %esp
; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <16 x i32>%a, %b
%resi = call <16 x i1> @func16xi1(<16 x i1> %cmpRes)
%res = sext <16 x i1>%resi to <16 x i32>
ret <16 x i32> %res
}
declare <4 x i1> @func4xi1(<4 x i1> %a)
define <4 x i32> @test7(<4 x i32>%a, <4 x i32>%b) {
; ALL_X64-LABEL: test7:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: pushq %rax
; ALL_X64-NEXT: .cfi_def_cfa_offset 16
; ALL_X64-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; ALL_X64-NEXT: callq _func4xi1
; ALL_X64-NEXT: vpslld $31, %xmm0, %xmm0
; ALL_X64-NEXT: vpsrad $31, %xmm0, %xmm0
; ALL_X64-NEXT: popq %rax
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test7:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: subl $12, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %xmm1, %xmm0, %xmm0
; KNL_X32-NEXT: calll _func4xi1
; KNL_X32-NEXT: vpslld $31, %xmm0, %xmm0
; KNL_X32-NEXT: vpsrad $31, %xmm0, %xmm0
; KNL_X32-NEXT: addl $12, %esp
; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <4 x i32>%a, %b
%resi = call <4 x i1> @func4xi1(<4 x i1> %cmpRes)
%res = sext <4 x i1>%resi to <4 x i32>
ret <4 x i32> %res
}
define <8 x i1> @test7a(<8 x i32>%a, <8 x i32>%b) {
; KNL-LABEL: test7a:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rax
; KNL-NEXT: .cfi_def_cfa_offset 16
; KNL-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL-NEXT: vpmovdw %zmm0, %ymm0
; KNL-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL-NEXT: callq _func8xi1
; KNL-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0
; KNL-NEXT: popq %rax
; KNL-NEXT: retq
;
; SKX-LABEL: test7a:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rax
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: vpcmpgtd %ymm1, %ymm0, %k0
; SKX-NEXT: vpmovm2w %k0, %xmm0
; SKX-NEXT: vzeroupper
; SKX-NEXT: callq _func8xi1
; SKX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: popq %rax
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test7a:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: subl $12, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: vpcmpgtd %ymm1, %ymm0, %ymm0
; KNL_X32-NEXT: vpmovdw %zmm0, %ymm0
; KNL_X32-NEXT: ## kill: def $xmm0 killed $xmm0 killed $ymm0
; KNL_X32-NEXT: calll _func8xi1
; KNL_X32-NEXT: vandps LCPI7_0, %xmm0, %xmm0
; KNL_X32-NEXT: addl $12, %esp
; KNL_X32-NEXT: retl
%cmpRes = icmp sgt <8 x i32>%a, %b
%resi = call <8 x i1> @func8xi1(<8 x i1> %cmpRes)
%res = and <8 x i1>%resi, <i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false>
ret <8 x i1> %res
}
define <16 x i8> @test8(<16 x i8> %a1, <16 x i8> %a2, i1 %cond) {
; ALL_X64-LABEL: test8:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: testb $1, %dil
; ALL_X64-NEXT: jne LBB8_2
; ALL_X64-NEXT: ## %bb.1:
; ALL_X64-NEXT: vmovaps %xmm1, %xmm0
; ALL_X64-NEXT: LBB8_2:
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test8:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: jne LBB8_2
; KNL_X32-NEXT: ## %bb.1:
; KNL_X32-NEXT: vmovaps %xmm1, %xmm0
; KNL_X32-NEXT: LBB8_2:
; KNL_X32-NEXT: retl
%res = select i1 %cond, <16 x i8> %a1, <16 x i8> %a2
ret <16 x i8> %res
}
define i1 @test9(double %a, double %b) {
; ALL_X64-LABEL: test9:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: vucomisd %xmm0, %xmm1
; ALL_X64-NEXT: setb %al
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test9:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
; KNL_X32-NEXT: vucomisd {{[0-9]+}}(%esp), %xmm0
; KNL_X32-NEXT: setb %al
; KNL_X32-NEXT: retl
%c = fcmp ugt double %a, %b
ret i1 %c
}
define i32 @test10(i32 %a, i32 %b, i1 %cond) {
; ALL_X64-LABEL: test10:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: movl %edi, %eax
; ALL_X64-NEXT: testb $1, %dl
; ALL_X64-NEXT: cmovel %esi, %eax
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test10:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: leal {{[0-9]+}}(%esp), %ecx
; KNL_X32-NEXT: cmovnel %eax, %ecx
; KNL_X32-NEXT: movl (%ecx), %eax
; KNL_X32-NEXT: retl
%c = select i1 %cond, i32 %a, i32 %b
ret i32 %c
}
define i1 @test11(i32 %a, i32 %b) {
; ALL_X64-LABEL: test11:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: cmpl %esi, %edi
; ALL_X64-NEXT: setg %al
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test11:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: cmpl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: setg %al
; KNL_X32-NEXT: retl
%c = icmp sgt i32 %a, %b
ret i1 %c
}
define i32 @test12(i32 %a1, i32 %a2, i32 %b1) {
; ALL_X64-LABEL: test12:
; ALL_X64: ## %bb.0:
; ALL_X64-NEXT: pushq %rbp
; ALL_X64-NEXT: .cfi_def_cfa_offset 16
; ALL_X64-NEXT: pushq %r14
; ALL_X64-NEXT: .cfi_def_cfa_offset 24
; ALL_X64-NEXT: pushq %rbx
; ALL_X64-NEXT: .cfi_def_cfa_offset 32
; ALL_X64-NEXT: .cfi_offset %rbx, -32
; ALL_X64-NEXT: .cfi_offset %r14, -24
; ALL_X64-NEXT: .cfi_offset %rbp, -16
; ALL_X64-NEXT: movl %esi, %r14d
; ALL_X64-NEXT: movl %edi, %ebp
; ALL_X64-NEXT: movl %edx, %esi
; ALL_X64-NEXT: callq _test11
; ALL_X64-NEXT: movzbl %al, %ebx
; ALL_X64-NEXT: movl %ebp, %edi
; ALL_X64-NEXT: movl %r14d, %esi
; ALL_X64-NEXT: movl %ebx, %edx
; ALL_X64-NEXT: callq _test10
; ALL_X64-NEXT: xorl %ecx, %ecx
; ALL_X64-NEXT: testb $1, %bl
; ALL_X64-NEXT: cmovel %ecx, %eax
; ALL_X64-NEXT: popq %rbx
; ALL_X64-NEXT: popq %r14
; ALL_X64-NEXT: popq %rbp
; ALL_X64-NEXT: retq
;
; KNL_X32-LABEL: test12:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %ebx
; KNL_X32-NEXT: .cfi_def_cfa_offset 8
; KNL_X32-NEXT: pushl %edi
; KNL_X32-NEXT: .cfi_def_cfa_offset 12
; KNL_X32-NEXT: pushl %esi
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: subl $16, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 32
; KNL_X32-NEXT: .cfi_offset %esi, -16
; KNL_X32-NEXT: .cfi_offset %edi, -12
; KNL_X32-NEXT: .cfi_offset %ebx, -8
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %edi
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: movl %edi, (%esp)
; KNL_X32-NEXT: calll _test11
; KNL_X32-NEXT: movl %eax, %ebx
; KNL_X32-NEXT: movzbl %al, %eax
; KNL_X32-NEXT: movl %eax, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: movl %esi, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: movl %edi, (%esp)
; KNL_X32-NEXT: calll _test10
; KNL_X32-NEXT: xorl %ecx, %ecx
; KNL_X32-NEXT: testb $1, %bl
; KNL_X32-NEXT: cmovel %ecx, %eax
; KNL_X32-NEXT: addl $16, %esp
; KNL_X32-NEXT: popl %esi
; KNL_X32-NEXT: popl %edi
; KNL_X32-NEXT: popl %ebx
; KNL_X32-NEXT: retl
%cond = call i1 @test11(i32 %a1, i32 %b1)
%res = call i32 @test10(i32 %a1, i32 %a2, i1 %cond)
%res1 = select i1 %cond, i32 %res, i32 0
ret i32 %res1
}
define <1 x i1> @test13(<1 x i1>* %foo) {
; KNL-LABEL: test13:
; KNL: ## %bb.0:
; KNL-NEXT: movzbl (%rdi), %eax
; KNL-NEXT: ## kill: def $al killed $al killed $eax
; KNL-NEXT: retq
;
; SKX-LABEL: test13:
; SKX: ## %bb.0:
; SKX-NEXT: kmovb (%rdi), %k0
; SKX-NEXT: kmovd %k0, %eax
; SKX-NEXT: ## kill: def $al killed $al killed $eax
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test13:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: movzbl (%eax), %eax
; KNL_X32-NEXT: ## kill: def $al killed $al killed $eax
; KNL_X32-NEXT: retl
%bar = load <1 x i1>, <1 x i1>* %foo
ret <1 x i1> %bar
}
define void @test14(<32 x i16>* %x) {
; KNL-NEW-LABEL: test14:
; KNL-NEW: ## %bb.0:
; KNL-NEW-NEXT: pushq %rbx
; KNL-NEW-NEXT: .cfi_def_cfa_offset 16
; KNL-NEW-NEXT: .cfi_offset %rbx, -16
; KNL-NEW-NEXT: movq %rdi, %rbx
; KNL-NEW-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEW-NEXT: callq _test14_callee
; KNL-NEW-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEW-NEXT: popq %rbx
; KNL-NEW-NEXT: retq
;
; KNL-OLD-LABEL: test14:
; KNL-OLD: ## %bb.0:
; KNL-OLD-NEXT: pushq %rbx
; KNL-OLD-NEXT: .cfi_def_cfa_offset 16
; KNL-OLD-NEXT: .cfi_offset %rbx, -16
; KNL-OLD-NEXT: movq %rdi, %rbx
; KNL-OLD-NEXT: vmovaps (%rdi), %ymm0
; KNL-OLD-NEXT: vmovaps 32(%rdi), %ymm1
; KNL-OLD-NEXT: callq _test14_callee
; KNL-OLD-NEXT: vmovaps %ymm1, 32(%rbx)
; KNL-OLD-NEXT: vmovaps %ymm0, (%rbx)
; KNL-OLD-NEXT: popq %rbx
; KNL-OLD-NEXT: retq
;
; SKX-LABEL: test14:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rbx
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: .cfi_offset %rbx, -16
; SKX-NEXT: movq %rdi, %rbx
; SKX-NEXT: vmovaps (%rdi), %zmm0
; SKX-NEXT: callq _test14_callee
; SKX-NEXT: vmovaps %zmm0, (%rbx)
; SKX-NEXT: popq %rbx
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test14:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %esi
; KNL_X32-NEXT: .cfi_def_cfa_offset 8
; KNL_X32-NEXT: subl $8, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: .cfi_offset %esi, -8
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT: vmovaps (%esi), %zmm0
; KNL_X32-NEXT: calll _test14_callee
; KNL_X32-NEXT: vmovaps %zmm0, (%esi)
; KNL_X32-NEXT: addl $8, %esp
; KNL_X32-NEXT: popl %esi
; KNL_X32-NEXT: retl
%a = load <32 x i16>, <32 x i16>* %x
%b = call <32 x i16> @test14_callee(<32 x i16> %a)
store <32 x i16> %b, <32 x i16>* %x
ret void
}
declare <32 x i16> @test14_callee(<32 x i16>)
define void @test15(<64 x i8>* %x) {
; KNL-NEW-LABEL: test15:
; KNL-NEW: ## %bb.0:
; KNL-NEW-NEXT: pushq %rbx
; KNL-NEW-NEXT: .cfi_def_cfa_offset 16
; KNL-NEW-NEXT: .cfi_offset %rbx, -16
; KNL-NEW-NEXT: movq %rdi, %rbx
; KNL-NEW-NEXT: vmovaps (%rdi), %zmm0
; KNL-NEW-NEXT: callq _test15_callee
; KNL-NEW-NEXT: vmovaps %zmm0, (%rbx)
; KNL-NEW-NEXT: popq %rbx
; KNL-NEW-NEXT: retq
;
; KNL-OLD-LABEL: test15:
; KNL-OLD: ## %bb.0:
; KNL-OLD-NEXT: pushq %rbx
; KNL-OLD-NEXT: .cfi_def_cfa_offset 16
; KNL-OLD-NEXT: .cfi_offset %rbx, -16
; KNL-OLD-NEXT: movq %rdi, %rbx
; KNL-OLD-NEXT: vmovaps (%rdi), %ymm0
; KNL-OLD-NEXT: vmovaps 32(%rdi), %ymm1
; KNL-OLD-NEXT: callq _test15_callee
; KNL-OLD-NEXT: vmovaps %ymm1, 32(%rbx)
; KNL-OLD-NEXT: vmovaps %ymm0, (%rbx)
; KNL-OLD-NEXT: popq %rbx
; KNL-OLD-NEXT: retq
;
; SKX-LABEL: test15:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rbx
; SKX-NEXT: .cfi_def_cfa_offset 16
; SKX-NEXT: .cfi_offset %rbx, -16
; SKX-NEXT: movq %rdi, %rbx
; SKX-NEXT: vmovaps (%rdi), %zmm0
; SKX-NEXT: callq _test15_callee
; SKX-NEXT: vmovaps %zmm0, (%rbx)
; SKX-NEXT: popq %rbx
; SKX-NEXT: vzeroupper
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test15:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %esi
; KNL_X32-NEXT: .cfi_def_cfa_offset 8
; KNL_X32-NEXT: subl $8, %esp
; KNL_X32-NEXT: .cfi_def_cfa_offset 16
; KNL_X32-NEXT: .cfi_offset %esi, -8
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %esi
; KNL_X32-NEXT: vmovaps (%esi), %zmm0
; KNL_X32-NEXT: calll _test15_callee
; KNL_X32-NEXT: vmovaps %zmm0, (%esi)
; KNL_X32-NEXT: addl $8, %esp
; KNL_X32-NEXT: popl %esi
; KNL_X32-NEXT: retl
%a = load <64 x i8>, <64 x i8>* %x
%b = call <64 x i8> @test15_callee(<64 x i8> %a)
store <64 x i8> %b, <64 x i8>* %x
ret void
}
declare <64 x i8> @test15_callee(<64 x i8>)
define <17 x i1> @test16(<17 x i1> %a, <17 x i1> %b) nounwind {
; KNL-LABEL: test16:
; KNL: ## %bb.0:
; KNL-NEXT: pushq %rbp
; KNL-NEXT: pushq %r15
; KNL-NEXT: pushq %r14
; KNL-NEXT: pushq %r13
; KNL-NEXT: pushq %r12
; KNL-NEXT: pushq %rbx
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kshiftlw $2, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $1, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: kshiftlw $3, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $2, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: kshiftlw $4, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $3, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: kshiftlw $5, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $4, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: kshiftlw $6, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $5, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: kshiftlw $7, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $6, %k1, %k1
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $9, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $7, %k1, %k1
; KNL-NEXT: kshiftlw $8, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $8, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $8, %k1, %k1
; KNL-NEXT: kshiftlw $9, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $7, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $9, %k1, %k1
; KNL-NEXT: kshiftlw $10, %k0, %k2
; KNL-NEXT: kmovw %k2, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $6, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $10, %k1, %k1
; KNL-NEXT: kshiftlw $11, %k0, %k6
; KNL-NEXT: korw %k1, %k6, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $5, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $11, %k1, %k1
; KNL-NEXT: kshiftlw $12, %k0, %k5
; KNL-NEXT: korw %k1, %k5, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $4, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $12, %k1, %k1
; KNL-NEXT: kshiftlw $13, %k0, %k4
; KNL-NEXT: korw %k1, %k4, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $3, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k1
; KNL-NEXT: kshiftlw $13, %k1, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $2, %k0, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k2
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k1
; KNL-NEXT: korw %k0, %k1, %k0
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: kmovw %edx, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kmovw %esi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $2, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %r8d, %k2
; KNL-NEXT: kshiftlw $3, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %r9d, %k2
; KNL-NEXT: kshiftlw $4, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $5, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $9, %k0, %k0
; KNL-NEXT: kshiftrw $9, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $8, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $8, %k0, %k0
; KNL-NEXT: kshiftrw $8, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $9, %k2, %k2
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k7 ## 2-byte Reload
; KNL-NEXT: korw %k2, %k7, %k2
; KNL-NEXT: kshiftlw $7, %k0, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $10, %k2, %k2
; KNL-NEXT: korw %k2, %k6, %k2
; KNL-NEXT: kshiftlw $6, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $11, %k2, %k2
; KNL-NEXT: korw %k2, %k5, %k2
; KNL-NEXT: xorl %ecx, %ecx
; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
; KNL-NEXT: movl $65535, %edx ## imm = 0xFFFF
; KNL-NEXT: movl $0, %esi
; KNL-NEXT: cmovnel %edx, %esi
; KNL-NEXT: kshiftlw $5, %k0, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $12, %k2, %k2
; KNL-NEXT: korw %k2, %k4, %k2
; KNL-NEXT: testb $1, {{[0-9]+}}(%rsp)
; KNL-NEXT: kshiftlw $4, %k0, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $13, %k2, %k2
; KNL-NEXT: korw %k2, %k3, %k2
; KNL-NEXT: cmovnel %edx, %ecx
; KNL-NEXT: kshiftlw $3, %k0, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl
; KNL-NEXT: kmovw %edx, %k2
; KNL-NEXT: kshiftlw $14, %k2, %k2
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: kshiftlw $2, %k0, %k0
; KNL-NEXT: kshiftrw $2, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kshiftlw $1, %k0, %k0
; KNL-NEXT: kshiftrw $1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dl
; KNL-NEXT: kmovw %edx, %k1
; KNL-NEXT: kshiftlw $15, %k1, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %esi, %k1
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k2 ## 2-byte Reload
; KNL-NEXT: kandw %k2, %k0, %k0
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kandw %k1, %k2, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: kmovw %k1, %r11d
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %k1, %r12d
; KNL-NEXT: kshiftrw $5, %k0, %k1
; KNL-NEXT: kmovw %k1, %r15d
; KNL-NEXT: kshiftrw $6, %k0, %k1
; KNL-NEXT: kmovw %k1, %r14d
; KNL-NEXT: kshiftrw $7, %k0, %k1
; KNL-NEXT: kmovw %k1, %r13d
; KNL-NEXT: kshiftrw $8, %k0, %k1
; KNL-NEXT: kmovw %k1, %ebx
; KNL-NEXT: kshiftrw $9, %k0, %k1
; KNL-NEXT: kmovw %k1, %esi
; KNL-NEXT: kshiftrw $10, %k0, %k1
; KNL-NEXT: kmovw %k1, %ebp
; KNL-NEXT: kshiftrw $11, %k0, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftrw $12, %k0, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kshiftrw $13, %k0, %k1
; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftrw $14, %k0, %k1
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: movb %r8b, 2(%rax)
; KNL-NEXT: kmovw %k0, %r8d
; KNL-NEXT: andl $1, %r8d
; KNL-NEXT: andl $1, %r9d
; KNL-NEXT: leal (%r8,%r9,2), %r8d
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: andl $1, %r10d
; KNL-NEXT: leal (%r8,%r10,4), %r8d
; KNL-NEXT: kmovw %k0, %r10d
; KNL-NEXT: andl $1, %r11d
; KNL-NEXT: leal (%r8,%r11,8), %r8d
; KNL-NEXT: andl $1, %r12d
; KNL-NEXT: shll $4, %r12d
; KNL-NEXT: orl %r8d, %r12d
; KNL-NEXT: andl $1, %r15d
; KNL-NEXT: shll $5, %r15d
; KNL-NEXT: orl %r12d, %r15d
; KNL-NEXT: andl $1, %r14d
; KNL-NEXT: shll $6, %r14d
; KNL-NEXT: andl $1, %r13d
; KNL-NEXT: shll $7, %r13d
; KNL-NEXT: orl %r14d, %r13d
; KNL-NEXT: andl $1, %ebx
; KNL-NEXT: shll $8, %ebx
; KNL-NEXT: orl %r13d, %ebx
; KNL-NEXT: andl $1, %esi
; KNL-NEXT: shll $9, %esi
; KNL-NEXT: orl %ebx, %esi
; KNL-NEXT: andl $1, %ebp
; KNL-NEXT: shll $10, %ebp
; KNL-NEXT: orl %esi, %ebp
; KNL-NEXT: orl %r15d, %ebp
; KNL-NEXT: andl $1, %ecx
; KNL-NEXT: shll $11, %ecx
; KNL-NEXT: andl $1, %edx
; KNL-NEXT: shll $12, %edx
; KNL-NEXT: orl %ecx, %edx
; KNL-NEXT: andl $1, %edi
; KNL-NEXT: shll $13, %edi
; KNL-NEXT: orl %edx, %edi
; KNL-NEXT: andl $1, %r9d
; KNL-NEXT: shll $14, %r9d
; KNL-NEXT: orl %edi, %r9d
; KNL-NEXT: andl $1, %r10d
; KNL-NEXT: shll $15, %r10d
; KNL-NEXT: orl %r9d, %r10d
; KNL-NEXT: orl %ebp, %r10d
; KNL-NEXT: movw %r10w, (%rax)
; KNL-NEXT: popq %rbx
; KNL-NEXT: popq %r12
; KNL-NEXT: popq %r13
; KNL-NEXT: popq %r14
; KNL-NEXT: popq %r15
; KNL-NEXT: popq %rbp
; KNL-NEXT: retq
;
; SKX-LABEL: test16:
; SKX: ## %bb.0:
; SKX-NEXT: pushq %rbp
; SKX-NEXT: pushq %r15
; SKX-NEXT: pushq %r14
; SKX-NEXT: pushq %r13
; SKX-NEXT: pushq %r12
; SKX-NEXT: pushq %rbx
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: kshiftld $31, %k0, %k0
; SKX-NEXT: kshiftrd $31, %k0, %k1
; SKX-NEXT: kshiftld $2, %k0, %k0
; SKX-NEXT: kord %k0, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $30, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $3, %k1, %k2
; SKX-NEXT: kshiftld $3, %k2, %k2
; SKX-NEXT: kshiftld $30, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrd $30, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $29, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $4, %k1, %k2
; SKX-NEXT: kshiftld $4, %k2, %k2
; SKX-NEXT: kshiftld $29, %k1, %k1
; SKX-NEXT: kshiftrd $29, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $28, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $5, %k1, %k2
; SKX-NEXT: kshiftld $5, %k2, %k2
; SKX-NEXT: kshiftld $28, %k1, %k1
; SKX-NEXT: kshiftrd $28, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $27, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $6, %k1, %k2
; SKX-NEXT: kshiftld $6, %k2, %k2
; SKX-NEXT: kshiftld $27, %k1, %k1
; SKX-NEXT: kshiftrd $27, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $26, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $7, %k1, %k2
; SKX-NEXT: kshiftld $7, %k2, %k2
; SKX-NEXT: kshiftld $26, %k1, %k1
; SKX-NEXT: kshiftrd $26, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $25, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $8, %k1, %k2
; SKX-NEXT: kshiftld $8, %k2, %k2
; SKX-NEXT: kshiftld $25, %k1, %k1
; SKX-NEXT: kshiftrd $25, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $24, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $9, %k1, %k2
; SKX-NEXT: kshiftld $9, %k2, %k2
; SKX-NEXT: kshiftld $24, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrd $24, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $23, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $10, %k1, %k2
; SKX-NEXT: kshiftld $10, %k2, %k2
; SKX-NEXT: kshiftld $23, %k1, %k1
; SKX-NEXT: kshiftrd $23, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $22, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $11, %k1, %k2
; SKX-NEXT: kshiftld $11, %k2, %k2
; SKX-NEXT: kshiftld $22, %k1, %k1
; SKX-NEXT: kshiftrd $22, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $21, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $12, %k1, %k2
; SKX-NEXT: kshiftld $12, %k2, %k2
; SKX-NEXT: kshiftld $21, %k1, %k1
; SKX-NEXT: kshiftrd $21, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $20, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $13, %k1, %k2
; SKX-NEXT: kshiftld $13, %k2, %k2
; SKX-NEXT: kshiftld $20, %k1, %k1
; SKX-NEXT: kshiftrd $20, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $19, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $14, %k1, %k2
; SKX-NEXT: kshiftld $14, %k2, %k2
; SKX-NEXT: kshiftld $19, %k1, %k1
; SKX-NEXT: kshiftrd $19, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $18, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $15, %k1, %k2
; SKX-NEXT: kshiftld $15, %k2, %k2
; SKX-NEXT: kshiftld $18, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrd $18, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $17, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $16, %k1, %k2
; SKX-NEXT: kshiftld $16, %k2, %k2
; SKX-NEXT: kshiftld $17, %k1, %k1
; SKX-NEXT: kshiftrd $17, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $16, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kshiftrd $17, %k1, %k2
; SKX-NEXT: kshiftld $17, %k2, %k2
; SKX-NEXT: kshiftld $16, %k1, %k1
; SKX-NEXT: kshiftrd $16, %k1, %k1
; SKX-NEXT: kord %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $15, %k2, %k2
; SKX-NEXT: kord %k1, %k2, %k1
; SKX-NEXT: kmovd %esi, %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $31, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kmovd %edx, %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $30, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $3, %k0, %k2
; SKX-NEXT: kshiftld $3, %k2, %k2
; SKX-NEXT: kshiftld $30, %k0, %k0
; SKX-NEXT: kshiftrd $30, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovd %ecx, %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $29, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $4, %k0, %k2
; SKX-NEXT: kshiftld $4, %k2, %k2
; SKX-NEXT: kshiftld $29, %k0, %k0
; SKX-NEXT: kshiftrd $29, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovd %r8d, %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $28, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $5, %k0, %k2
; SKX-NEXT: kshiftld $5, %k2, %k2
; SKX-NEXT: kshiftld $28, %k0, %k0
; SKX-NEXT: kshiftrd $28, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovd %r9d, %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $27, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $6, %k0, %k2
; SKX-NEXT: kshiftld $6, %k2, %k2
; SKX-NEXT: kshiftld $27, %k0, %k0
; SKX-NEXT: kshiftrd $27, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $26, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $7, %k0, %k2
; SKX-NEXT: kshiftld $7, %k2, %k2
; SKX-NEXT: kshiftld $26, %k0, %k0
; SKX-NEXT: kshiftrd $26, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $25, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $8, %k0, %k2
; SKX-NEXT: kshiftld $8, %k2, %k2
; SKX-NEXT: kshiftld $25, %k0, %k0
; SKX-NEXT: kshiftrd $25, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $24, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $9, %k0, %k2
; SKX-NEXT: kshiftld $9, %k2, %k2
; SKX-NEXT: kshiftld $24, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrd $24, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $23, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $10, %k0, %k2
; SKX-NEXT: kshiftld $10, %k2, %k2
; SKX-NEXT: kshiftld $23, %k0, %k0
; SKX-NEXT: kshiftrd $23, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $22, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $11, %k0, %k2
; SKX-NEXT: kshiftld $11, %k2, %k2
; SKX-NEXT: kshiftld $22, %k0, %k0
; SKX-NEXT: kshiftrd $22, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $21, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $12, %k0, %k2
; SKX-NEXT: kshiftld $12, %k2, %k2
; SKX-NEXT: kshiftld $21, %k0, %k0
; SKX-NEXT: kshiftrd $21, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $20, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $13, %k0, %k2
; SKX-NEXT: kshiftld $13, %k2, %k2
; SKX-NEXT: kshiftld $20, %k0, %k0
; SKX-NEXT: kshiftrd $20, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $19, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $14, %k0, %k2
; SKX-NEXT: kshiftld $14, %k2, %k2
; SKX-NEXT: kshiftld $19, %k0, %k0
; SKX-NEXT: kshiftrd $19, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $18, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $15, %k0, %k2
; SKX-NEXT: kshiftld $15, %k2, %k2
; SKX-NEXT: kshiftld $18, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrd $18, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $17, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $16, %k0, %k2
; SKX-NEXT: kshiftld $16, %k2, %k2
; SKX-NEXT: kshiftld $17, %k0, %k0
; SKX-NEXT: kshiftrd $17, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kshiftld $31, %k3, %k2
; SKX-NEXT: kshiftrd $16, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kshiftrd $17, %k0, %k2
; SKX-NEXT: kshiftld $17, %k2, %k2
; SKX-NEXT: kshiftld $16, %k0, %k0
; SKX-NEXT: kshiftrd $16, %k0, %k0
; SKX-NEXT: kord %k2, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftld $31, %k2, %k2
; SKX-NEXT: kshiftrd $15, %k2, %k2
; SKX-NEXT: kord %k0, %k2, %k0
; SKX-NEXT: kandd %k1, %k0, %k0
; SKX-NEXT: kshiftrd $16, %k0, %k1
; SKX-NEXT: kmovd %k1, %r8d
; SKX-NEXT: kshiftrd $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %r9d
; SKX-NEXT: kshiftrd $2, %k0, %k1
; SKX-NEXT: kmovd %k1, %r10d
; SKX-NEXT: kshiftrd $3, %k0, %k1
; SKX-NEXT: kmovd %k1, %r11d
; SKX-NEXT: kshiftrd $4, %k0, %k1
; SKX-NEXT: kmovd %k1, %r12d
; SKX-NEXT: kshiftrd $5, %k0, %k1
; SKX-NEXT: kmovd %k1, %r15d
; SKX-NEXT: kshiftrd $6, %k0, %k1
; SKX-NEXT: kmovd %k1, %r14d
; SKX-NEXT: kshiftrd $7, %k0, %k1
; SKX-NEXT: kmovd %k1, %r13d
; SKX-NEXT: kshiftrd $8, %k0, %k1
; SKX-NEXT: kmovd %k1, %ebx
; SKX-NEXT: kshiftrd $9, %k0, %k1
; SKX-NEXT: kmovd %k1, %esi
; SKX-NEXT: kshiftrd $10, %k0, %k1
; SKX-NEXT: kmovd %k1, %ebp
; SKX-NEXT: kshiftrd $11, %k0, %k1
; SKX-NEXT: kmovd %k1, %ecx
; SKX-NEXT: kshiftrd $12, %k0, %k1
; SKX-NEXT: kmovd %k1, %edx
; SKX-NEXT: kshiftrd $13, %k0, %k1
; SKX-NEXT: kmovd %k1, %edi
; SKX-NEXT: kshiftrd $14, %k0, %k1
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: movb %r8b, 2(%rax)
; SKX-NEXT: kmovd %k0, %r8d
; SKX-NEXT: andl $1, %r8d
; SKX-NEXT: andl $1, %r9d
; SKX-NEXT: leal (%r8,%r9,2), %r8d
; SKX-NEXT: kmovd %k1, %r9d
; SKX-NEXT: kshiftrd $15, %k0, %k0
; SKX-NEXT: andl $1, %r10d
; SKX-NEXT: leal (%r8,%r10,4), %r8d
; SKX-NEXT: kmovd %k0, %r10d
; SKX-NEXT: andl $1, %r11d
; SKX-NEXT: leal (%r8,%r11,8), %r8d
; SKX-NEXT: andl $1, %r12d
; SKX-NEXT: shll $4, %r12d
; SKX-NEXT: orl %r8d, %r12d
; SKX-NEXT: andl $1, %r15d
; SKX-NEXT: shll $5, %r15d
; SKX-NEXT: orl %r12d, %r15d
; SKX-NEXT: andl $1, %r14d
; SKX-NEXT: shll $6, %r14d
; SKX-NEXT: andl $1, %r13d
; SKX-NEXT: shll $7, %r13d
; SKX-NEXT: orl %r14d, %r13d
; SKX-NEXT: andl $1, %ebx
; SKX-NEXT: shll $8, %ebx
; SKX-NEXT: orl %r13d, %ebx
; SKX-NEXT: andl $1, %esi
; SKX-NEXT: shll $9, %esi
; SKX-NEXT: orl %ebx, %esi
; SKX-NEXT: andl $1, %ebp
; SKX-NEXT: shll $10, %ebp
; SKX-NEXT: orl %esi, %ebp
; SKX-NEXT: orl %r15d, %ebp
; SKX-NEXT: andl $1, %ecx
; SKX-NEXT: shll $11, %ecx
; SKX-NEXT: andl $1, %edx
; SKX-NEXT: shll $12, %edx
; SKX-NEXT: orl %ecx, %edx
; SKX-NEXT: andl $1, %edi
; SKX-NEXT: shll $13, %edi
; SKX-NEXT: orl %edx, %edi
; SKX-NEXT: andl $1, %r9d
; SKX-NEXT: shll $14, %r9d
; SKX-NEXT: orl %edi, %r9d
; SKX-NEXT: andl $1, %r10d
; SKX-NEXT: shll $15, %r10d
; SKX-NEXT: orl %r9d, %r10d
; SKX-NEXT: orl %ebp, %r10d
; SKX-NEXT: movw %r10w, (%rax)
; SKX-NEXT: popq %rbx
; SKX-NEXT: popq %r12
; SKX-NEXT: popq %r13
; SKX-NEXT: popq %r14
; SKX-NEXT: popq %r15
; SKX-NEXT: popq %rbp
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test16:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %ebp
; KNL_X32-NEXT: pushl %ebx
; KNL_X32-NEXT: pushl %edi
; KNL_X32-NEXT: pushl %esi
; KNL_X32-NEXT: subl $20, %esp
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: kshiftlw $2, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $1, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: kshiftlw $3, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $2, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: kshiftlw $4, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $3, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: kshiftlw $5, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $4, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: kshiftlw $6, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $5, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: kshiftlw $7, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $6, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $9, %k0, %k0
; KNL_X32-NEXT: kshiftrw $9, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $7, %k1, %k1
; KNL_X32-NEXT: kshiftlw $8, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $8, %k0, %k0
; KNL_X32-NEXT: kshiftrw $8, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $8, %k1, %k1
; KNL_X32-NEXT: kshiftlw $9, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $7, %k0, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $9, %k1, %k1
; KNL_X32-NEXT: kshiftlw $10, %k0, %k2
; KNL_X32-NEXT: kmovw %k2, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $6, %k0, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $10, %k1, %k1
; KNL_X32-NEXT: kshiftlw $11, %k0, %k6
; KNL_X32-NEXT: korw %k1, %k6, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $5, %k0, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $11, %k1, %k1
; KNL_X32-NEXT: kshiftlw $12, %k0, %k5
; KNL_X32-NEXT: korw %k1, %k5, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $4, %k0, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $12, %k1, %k1
; KNL_X32-NEXT: kshiftlw $13, %k0, %k4
; KNL_X32-NEXT: korw %k1, %k4, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $3, %k0, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k2
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k1
; KNL_X32-NEXT: kshiftlw $13, %k1, %k0
; KNL_X32-NEXT: kshiftlw $14, %k0, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftlw $2, %k0, %k0
; KNL_X32-NEXT: kshiftrw $2, %k0, %k2
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k1
; KNL_X32-NEXT: korw %k0, %k1, %k0
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftlw $1, %k0, %k0
; KNL_X32-NEXT: kshiftrw $1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: kmovw %k0, (%esp) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $1, %k0, %k0
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k2 ## 2-byte Reload
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $15, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $2, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $3, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $4, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $5, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $6, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $7, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $9, %k0, %k0
; KNL_X32-NEXT: kshiftrw $9, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $8, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $8, %k0, %k0
; KNL_X32-NEXT: kshiftrw $8, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $9, %k2, %k2
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k7 ## 2-byte Reload
; KNL_X32-NEXT: korw %k2, %k7, %k2
; KNL_X32-NEXT: kshiftlw $7, %k0, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $10, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k6, %k2
; KNL_X32-NEXT: kshiftlw $6, %k0, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $11, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k5, %k2
; KNL_X32-NEXT: kshiftlw $5, %k0, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $12, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k4, %k2
; KNL_X32-NEXT: kshiftlw $4, %k0, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $13, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k3, %k2
; KNL_X32-NEXT: kshiftlw $3, %k0, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $14, %k2, %k2
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: xorl %eax, %eax
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: movl $65535, %ecx ## imm = 0xFFFF
; KNL_X32-NEXT: movl $0, %edx
; KNL_X32-NEXT: cmovnel %ecx, %edx
; KNL_X32-NEXT: kshiftlw $2, %k0, %k0
; KNL_X32-NEXT: kshiftrw $2, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftlw $1, %k0, %k0
; KNL_X32-NEXT: kshiftrw $1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %bl
; KNL_X32-NEXT: kmovw %ebx, %k1
; KNL_X32-NEXT: kshiftlw $15, %k1, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: kmovw %edx, %k1
; KNL_X32-NEXT: testb $1, {{[0-9]+}}(%esp)
; KNL_X32-NEXT: cmovnel %ecx, %eax
; KNL_X32-NEXT: kmovw (%esp), %k2 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k2, %k0, %k0
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kandw %k1, %k2, %k1
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $1, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $2, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $4, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $5, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ecx
; KNL_X32-NEXT: kshiftrw $6, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: movb %bl, 2(%eax)
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: leal (%ebx,%esi,2), %esi
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $7, %k0, %k1
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: leal (%esi,%edi,4), %esi
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $8, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: leal (%esi,%ebp,8), %esi
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $9, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $4, %edx
; KNL_X32-NEXT: orl %esi, %edx
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $10, %k0, %k1
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $5, %ecx
; KNL_X32-NEXT: orl %edx, %ecx
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $11, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $6, %ebx
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $7, %edi
; KNL_X32-NEXT: orl %ebx, %edi
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $12, %k0, %k1
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: shll $8, %ebp
; KNL_X32-NEXT: orl %edi, %ebp
; KNL_X32-NEXT: kmovw %k1, %edi
; KNL_X32-NEXT: kshiftrw $13, %k0, %k1
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: shll $9, %esi
; KNL_X32-NEXT: orl %ebp, %esi
; KNL_X32-NEXT: kmovw %k1, %ebp
; KNL_X32-NEXT: kshiftrw $14, %k0, %k1
; KNL_X32-NEXT: andl $1, %edx
; KNL_X32-NEXT: shll $10, %edx
; KNL_X32-NEXT: orl %esi, %edx
; KNL_X32-NEXT: kmovw %k1, %esi
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: orl %ecx, %edx
; KNL_X32-NEXT: kmovw %k0, %ecx
; KNL_X32-NEXT: andl $1, %ebx
; KNL_X32-NEXT: shll $11, %ebx
; KNL_X32-NEXT: andl $1, %edi
; KNL_X32-NEXT: shll $12, %edi
; KNL_X32-NEXT: orl %ebx, %edi
; KNL_X32-NEXT: andl $1, %ebp
; KNL_X32-NEXT: shll $13, %ebp
; KNL_X32-NEXT: orl %edi, %ebp
; KNL_X32-NEXT: andl $1, %esi
; KNL_X32-NEXT: shll $14, %esi
; KNL_X32-NEXT: orl %ebp, %esi
; KNL_X32-NEXT: andl $1, %ecx
; KNL_X32-NEXT: shll $15, %ecx
; KNL_X32-NEXT: orl %esi, %ecx
; KNL_X32-NEXT: orl %edx, %ecx
; KNL_X32-NEXT: movw %cx, (%eax)
; KNL_X32-NEXT: addl $20, %esp
; KNL_X32-NEXT: popl %esi
; KNL_X32-NEXT: popl %edi
; KNL_X32-NEXT: popl %ebx
; KNL_X32-NEXT: popl %ebp
; KNL_X32-NEXT: retl $4
%c = and <17 x i1> %a, %b
ret <17 x i1> %c
}
define <7 x i1> @test17(<7 x i1> %a, <7 x i1> %b, <7 x i1> %c, <7 x i1> %d, <7 x i1>%e, <7 x i1>%f, <7 x i1> %g, <7 x i1> %h, <7 x i1> %i) nounwind {
; KNL-LABEL: test17:
; KNL: ## %bb.0:
; KNL-NEXT: movq %rdi, %rax
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: kshiftlw $2, %k0, %k1
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: kshiftlw $3, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: kshiftlw $4, %k2, %k2
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k2
; KNL-NEXT: kshiftlw $5, %k2, %k2
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k2
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k2
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: kshiftlw $3, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: kshiftlw $4, %k2, %k2
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k2
; KNL-NEXT: kshiftlw $5, %k2, %k2
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k2
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k2
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kmovw %k0, {{[-0-9]+}}(%r{{[sb]}}p) ## 2-byte Spill
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $14, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k3
; KNL-NEXT: kshiftlw $3, %k3, %k3
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k3, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $13, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k3
; KNL-NEXT: kshiftlw $4, %k3, %k3
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k3, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $12, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k3
; KNL-NEXT: kshiftlw $5, %k3, %k3
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k3, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $11, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k3
; KNL-NEXT: kshiftlw $6, %k3, %k3
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k3, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $10, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k3
; KNL-NEXT: kshiftlw $7, %k3, %k3
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k3, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k3
; KNL-NEXT: kshiftlw $15, %k3, %k3
; KNL-NEXT: kshiftrw $9, %k3, %k3
; KNL-NEXT: korw %k0, %k3, %k3
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $14, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k4
; KNL-NEXT: kshiftlw $3, %k4, %k4
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $13, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k4
; KNL-NEXT: kshiftlw $4, %k4, %k4
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $12, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k4
; KNL-NEXT: kshiftlw $5, %k4, %k4
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $11, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k4
; KNL-NEXT: kshiftlw $6, %k4, %k4
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $10, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k4
; KNL-NEXT: kshiftlw $7, %k4, %k4
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k4, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k4
; KNL-NEXT: kshiftlw $15, %k4, %k4
; KNL-NEXT: kshiftrw $9, %k4, %k4
; KNL-NEXT: korw %k0, %k4, %k4
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $14, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k5
; KNL-NEXT: kshiftlw $3, %k5, %k5
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $13, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k5
; KNL-NEXT: kshiftlw $4, %k5, %k5
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $12, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k5
; KNL-NEXT: kshiftlw $5, %k5, %k5
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $11, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k5
; KNL-NEXT: kshiftlw $6, %k5, %k5
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $10, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k5
; KNL-NEXT: kshiftlw $7, %k5, %k5
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k5, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k5
; KNL-NEXT: kshiftlw $15, %k5, %k5
; KNL-NEXT: kshiftrw $9, %k5, %k5
; KNL-NEXT: korw %k0, %k5, %k5
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $14, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k6
; KNL-NEXT: kshiftlw $3, %k6, %k6
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $13, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k6
; KNL-NEXT: kshiftlw $4, %k6, %k6
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $12, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k6
; KNL-NEXT: kshiftlw $5, %k6, %k6
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $11, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k6
; KNL-NEXT: kshiftlw $6, %k6, %k6
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $10, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k6
; KNL-NEXT: kshiftlw $7, %k6, %k6
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k6, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k6
; KNL-NEXT: kshiftlw $15, %k6, %k6
; KNL-NEXT: kshiftrw $9, %k6, %k6
; KNL-NEXT: korw %k0, %k6, %k6
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $14, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k7
; KNL-NEXT: kshiftlw $3, %k7, %k7
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $13, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k7
; KNL-NEXT: kshiftlw $4, %k7, %k7
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $12, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k7
; KNL-NEXT: kshiftlw $5, %k7, %k7
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $11, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k7
; KNL-NEXT: kshiftlw $6, %k7, %k7
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $10, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k7
; KNL-NEXT: kshiftlw $7, %k7, %k7
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k7, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %dil
; KNL-NEXT: kmovw %edi, %k7
; KNL-NEXT: kshiftlw $15, %k7, %k7
; KNL-NEXT: kshiftrw $9, %k7, %k7
; KNL-NEXT: korw %k0, %k7, %k7
; KNL-NEXT: kmovw %esi, %k0
; KNL-NEXT: kshiftlw $15, %k0, %k0
; KNL-NEXT: kshiftrw $15, %k0, %k0
; KNL-NEXT: korw %k1, %k0, %k0
; KNL-NEXT: kmovw %edx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $3, %k0, %k2
; KNL-NEXT: kshiftlw $3, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k0, %k0
; KNL-NEXT: kshiftrw $14, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $4, %k0, %k2
; KNL-NEXT: kshiftlw $4, %k2, %k2
; KNL-NEXT: kshiftlw $13, %k0, %k0
; KNL-NEXT: kshiftrw $13, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %r8d, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $5, %k0, %k2
; KNL-NEXT: kshiftlw $5, %k2, %k2
; KNL-NEXT: kshiftlw $12, %k0, %k0
; KNL-NEXT: kshiftrw $12, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: kmovw %r9d, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k2
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftlw $11, %k0, %k0
; KNL-NEXT: kshiftrw $11, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: kshiftrw $7, %k0, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k2
; KNL-NEXT: kshiftlw $10, %k0, %k0
; KNL-NEXT: kshiftrw $10, %k0, %k0
; KNL-NEXT: korw %k2, %k0, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: korw %k0, %k2, %k0
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $15, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $14, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $3, %k1, %k2
; KNL-NEXT: kshiftlw $3, %k2, %k2
; KNL-NEXT: kshiftlw $14, %k1, %k1
; KNL-NEXT: kshiftrw $14, %k1, %k1
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $13, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $4, %k1, %k2
; KNL-NEXT: kshiftlw $4, %k2, %k2
; KNL-NEXT: kshiftlw $13, %k1, %k1
; KNL-NEXT: kshiftrw $13, %k1, %k1
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $12, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $5, %k1, %k2
; KNL-NEXT: kshiftlw $5, %k2, %k2
; KNL-NEXT: kshiftlw $12, %k1, %k1
; KNL-NEXT: kshiftrw $12, %k1, %k1
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $11, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $6, %k1, %k2
; KNL-NEXT: kshiftlw $6, %k2, %k2
; KNL-NEXT: kshiftlw $11, %k1, %k1
; KNL-NEXT: kshiftrw $11, %k1, %k1
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $10, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kshiftrw $7, %k1, %k2
; KNL-NEXT: kshiftlw $7, %k2, %k2
; KNL-NEXT: kshiftlw $10, %k1, %k1
; KNL-NEXT: kshiftrw $10, %k1, %k1
; KNL-NEXT: korw %k2, %k1, %k1
; KNL-NEXT: movb {{[0-9]+}}(%rsp), %cl
; KNL-NEXT: kmovw %ecx, %k2
; KNL-NEXT: kshiftlw $15, %k2, %k2
; KNL-NEXT: kshiftrw $9, %k2, %k2
; KNL-NEXT: korw %k1, %k2, %k1
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kandw %k7, %k0, %k0
; KNL-NEXT: kandw %k6, %k0, %k0
; KNL-NEXT: kandw %k5, %k0, %k0
; KNL-NEXT: kandw %k4, %k0, %k0
; KNL-NEXT: kandw %k3, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kmovw {{[-0-9]+}}(%r{{[sb]}}p), %k1 ## 2-byte Reload
; KNL-NEXT: kandw %k1, %k0, %k0
; KNL-NEXT: kshiftrw $6, %k0, %k1
; KNL-NEXT: kmovw %k1, %r8d
; KNL-NEXT: kshiftrw $5, %k0, %k1
; KNL-NEXT: kmovw %k1, %r9d
; KNL-NEXT: kshiftrw $4, %k0, %k1
; KNL-NEXT: kmovw %k1, %r10d
; KNL-NEXT: kshiftrw $3, %k0, %k1
; KNL-NEXT: kmovw %k1, %edi
; KNL-NEXT: kshiftrw $2, %k0, %k1
; KNL-NEXT: kmovw %k1, %ecx
; KNL-NEXT: kshiftrw $1, %k0, %k1
; KNL-NEXT: kmovw %k1, %edx
; KNL-NEXT: kmovw %k0, %esi
; KNL-NEXT: andb $1, %sil
; KNL-NEXT: andb $1, %dl
; KNL-NEXT: addb %dl, %dl
; KNL-NEXT: orb %sil, %dl
; KNL-NEXT: andb $1, %cl
; KNL-NEXT: shlb $2, %cl
; KNL-NEXT: orb %dl, %cl
; KNL-NEXT: andb $1, %dil
; KNL-NEXT: shlb $3, %dil
; KNL-NEXT: orb %cl, %dil
; KNL-NEXT: andb $1, %r10b
; KNL-NEXT: shlb $4, %r10b
; KNL-NEXT: orb %dil, %r10b
; KNL-NEXT: andb $1, %r9b
; KNL-NEXT: shlb $5, %r9b
; KNL-NEXT: orb %r10b, %r9b
; KNL-NEXT: shlb $6, %r8b
; KNL-NEXT: orb %r9b, %r8b
; KNL-NEXT: andb $127, %r8b
; KNL-NEXT: movb %r8b, (%rax)
; KNL-NEXT: retq
;
; SKX-LABEL: test17:
; SKX: ## %bb.0:
; SKX-NEXT: movq %rdi, %rax
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k0
; SKX-NEXT: kshiftlb $7, %k0, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k1
; SKX-NEXT: kshiftlb $2, %k0, %k0
; SKX-NEXT: korb %k0, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $6, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kshiftrb $3, %k1, %k2
; SKX-NEXT: kshiftlb $3, %k2, %k2
; SKX-NEXT: kshiftlb $6, %k1, %k1
; SKX-NEXT: kshiftrb $6, %k1, %k1
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $5, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kshiftrb $4, %k1, %k2
; SKX-NEXT: kshiftlb $4, %k2, %k2
; SKX-NEXT: kshiftlb $5, %k1, %k1
; SKX-NEXT: kshiftrb $5, %k1, %k1
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $4, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kshiftrb $5, %k1, %k2
; SKX-NEXT: kshiftlb $5, %k2, %k2
; SKX-NEXT: kshiftlb $4, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrb $4, %k1, %k1
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kshiftlb $7, %k3, %k2
; SKX-NEXT: kshiftrb $3, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kshiftrb $6, %k1, %k2
; SKX-NEXT: kshiftlb $6, %k2, %k2
; SKX-NEXT: kshiftlb $3, %k1, %k1
; SKX-NEXT: kshiftrb $3, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kshiftlb $7, %k3, %k2
; SKX-NEXT: kshiftrb $2, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kshiftrb $7, %k1, %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftlb $2, %k1, %k1
; SKX-NEXT: kshiftrb $2, %k1, %k1
; SKX-NEXT: korb %k2, %k1, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $1, %k2, %k2
; SKX-NEXT: korb %k1, %k2, %k1
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k2
; SKX-NEXT: kshiftlb $7, %k2, %k2
; SKX-NEXT: kshiftrb $7, %k2, %k2
; SKX-NEXT: korb %k0, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $6, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $3, %k2, %k3
; SKX-NEXT: kshiftlb $3, %k3, %k3
; SKX-NEXT: kshiftlb $6, %k2, %k2
; SKX-NEXT: kshiftrb $6, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $4, %k2, %k3
; SKX-NEXT: kshiftlb $4, %k3, %k3
; SKX-NEXT: kshiftlb $5, %k2, %k2
; SKX-NEXT: kshiftrb $5, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $5, %k2, %k3
; SKX-NEXT: kshiftlb $5, %k3, %k3
; SKX-NEXT: kshiftlb $4, %k2, %k2
; SKX-NEXT: kshiftrb $4, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $6, %k2, %k3
; SKX-NEXT: kshiftlb $6, %k3, %k3
; SKX-NEXT: kshiftlb $3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftrb $3, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kshiftlb $7, %k4, %k3
; SKX-NEXT: kshiftrb $2, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $7, %k2, %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftlb $2, %k2, %k2
; SKX-NEXT: kshiftrb $2, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kshiftlb $7, %k4, %k3
; SKX-NEXT: kshiftrb $1, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kandb %k1, %k2, %k1
; SKX-NEXT: kshiftlb $7, %k4, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftrb $7, %k2, %k2
; SKX-NEXT: korb %k0, %k2, %k2
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $6, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $3, %k2, %k3
; SKX-NEXT: kshiftlb $3, %k3, %k3
; SKX-NEXT: kshiftlb $6, %k2, %k2
; SKX-NEXT: kshiftrb $6, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kshiftlb $7, %k4, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $4, %k2, %k3
; SKX-NEXT: kshiftlb $4, %k3, %k3
; SKX-NEXT: kshiftlb $5, %k2, %k2
; SKX-NEXT: kshiftrb $5, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $5, %k2, %k3
; SKX-NEXT: kshiftlb $5, %k3, %k3
; SKX-NEXT: kshiftlb $4, %k2, %k2
; SKX-NEXT: kshiftrb $4, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $6, %k2, %k3
; SKX-NEXT: kshiftlb $6, %k3, %k3
; SKX-NEXT: kshiftlb $3, %k2, %k2
; SKX-NEXT: kshiftrb $3, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $2, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kshiftrb $7, %k2, %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftlb $2, %k2, %k2
; SKX-NEXT: kshiftrb $2, %k2, %k2
; SKX-NEXT: korb %k3, %k2, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $1, %k3, %k3
; SKX-NEXT: korb %k2, %k3, %k2
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k3
; SKX-NEXT: kshiftlb $7, %k3, %k3
; SKX-NEXT: kshiftrb $7, %k3, %k3
; SKX-NEXT: korb %k0, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k4
; SKX-NEXT: kshiftlb $3, %k4, %k4
; SKX-NEXT: kshiftlb $6, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftrb $6, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kshiftlb $7, %k5, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k4
; SKX-NEXT: kshiftlb $4, %k4, %k4
; SKX-NEXT: kshiftlb $5, %k3, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kshiftlb $7, %k5, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k4
; SKX-NEXT: kshiftlb $5, %k4, %k4
; SKX-NEXT: kshiftlb $4, %k3, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $6, %k3, %k4
; SKX-NEXT: kshiftlb $6, %k4, %k4
; SKX-NEXT: kshiftlb $3, %k3, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $2, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $7, %k3, %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftlb $2, %k3, %k3
; SKX-NEXT: kshiftrb $2, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $1, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $7, %k4, %k4
; SKX-NEXT: korb %k0, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $6, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k5
; SKX-NEXT: kshiftlb $3, %k5, %k5
; SKX-NEXT: kshiftlb $6, %k4, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $5, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k5
; SKX-NEXT: kshiftlb $4, %k5, %k5
; SKX-NEXT: kshiftlb $5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6
; SKX-NEXT: kshiftrb $5, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k6, %k5
; SKX-NEXT: kshiftrb $4, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k5
; SKX-NEXT: kshiftlb $5, %k5, %k5
; SKX-NEXT: kshiftlb $4, %k4, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k6, %k5
; SKX-NEXT: kshiftrb $3, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k5
; SKX-NEXT: kshiftlb $6, %k5, %k5
; SKX-NEXT: kshiftlb $3, %k4, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $2, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $7, %k4, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftlb $2, %k4, %k4
; SKX-NEXT: kshiftrb $2, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $1, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kandb %k3, %k4, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kandb %k2, %k3, %k2
; SKX-NEXT: kshiftlb $7, %k4, %k3
; SKX-NEXT: kshiftrb $7, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: korb %k0, %k3, %k3
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k4
; SKX-NEXT: kshiftlb $3, %k4, %k4
; SKX-NEXT: kshiftlb $6, %k3, %k3
; SKX-NEXT: kshiftrb $6, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k4
; SKX-NEXT: kshiftlb $4, %k4, %k4
; SKX-NEXT: kshiftlb $5, %k3, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $5, %k3, %k4
; SKX-NEXT: kshiftlb $5, %k4, %k4
; SKX-NEXT: kshiftlb $4, %k3, %k3
; SKX-NEXT: kshiftrb $4, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $6, %k3, %k4
; SKX-NEXT: kshiftlb $6, %k4, %k4
; SKX-NEXT: kshiftlb $3, %k3, %k3
; SKX-NEXT: kshiftrb $3, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $2, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftrb $7, %k3, %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftlb $2, %k3, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftrb $2, %k3, %k3
; SKX-NEXT: korb %k4, %k3, %k3
; SKX-NEXT: kshiftlb $7, %k5, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftrb $1, %k4, %k4
; SKX-NEXT: korb %k3, %k4, %k3
; SKX-NEXT: kshiftlb $7, %k5, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftrb $7, %k4, %k4
; SKX-NEXT: korb %k0, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $6, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k5
; SKX-NEXT: kshiftlb $3, %k5, %k5
; SKX-NEXT: kshiftlb $6, %k4, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k6, %k5
; SKX-NEXT: kshiftrb $5, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k5
; SKX-NEXT: kshiftlb $4, %k5, %k5
; SKX-NEXT: kshiftlb $5, %k4, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $4, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k5
; SKX-NEXT: kshiftlb $5, %k5, %k5
; SKX-NEXT: kshiftlb $4, %k4, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $3, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k5
; SKX-NEXT: kshiftlb $6, %k5, %k5
; SKX-NEXT: kshiftlb $3, %k4, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $2, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $7, %k4, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftlb $2, %k4, %k4
; SKX-NEXT: kshiftrb $2, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $1, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kandb %k3, %k4, %k3
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k4
; SKX-NEXT: kshiftlb $7, %k4, %k4
; SKX-NEXT: kshiftrb $7, %k4, %k4
; SKX-NEXT: korb %k0, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $6, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k5
; SKX-NEXT: kshiftlb $3, %k5, %k5
; SKX-NEXT: kshiftlb $6, %k4, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $5, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k5
; SKX-NEXT: kshiftlb $4, %k5, %k5
; SKX-NEXT: kshiftlb $5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6
; SKX-NEXT: kshiftrb $5, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k6, %k5
; SKX-NEXT: kshiftrb $4, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $5, %k4, %k5
; SKX-NEXT: kshiftlb $5, %k5, %k5
; SKX-NEXT: kshiftlb $4, %k4, %k4
; SKX-NEXT: kshiftrb $4, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k6
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kshiftlb $7, %k6, %k5
; SKX-NEXT: kshiftrb $3, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $6, %k4, %k5
; SKX-NEXT: kshiftlb $6, %k5, %k5
; SKX-NEXT: kshiftlb $3, %k4, %k4
; SKX-NEXT: kshiftrb $3, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $2, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kshiftrb $7, %k4, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftlb $2, %k4, %k4
; SKX-NEXT: kshiftrb $2, %k4, %k4
; SKX-NEXT: korb %k5, %k4, %k4
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $1, %k5, %k5
; SKX-NEXT: korb %k4, %k5, %k4
; SKX-NEXT: kmovd %esi, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $7, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kmovd %edx, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $6, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kshiftrb $3, %k0, %k5
; SKX-NEXT: kshiftlb $3, %k5, %k5
; SKX-NEXT: kshiftlb $6, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k0
; SKX-NEXT: korb %k5, %k0, %k0
; SKX-NEXT: kmovd %ecx, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $5, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kshiftrb $4, %k0, %k5
; SKX-NEXT: kshiftlb $4, %k5, %k5
; SKX-NEXT: kshiftlb $5, %k0, %k0
; SKX-NEXT: kshiftrb $5, %k0, %k0
; SKX-NEXT: korb %k5, %k0, %k0
; SKX-NEXT: kmovd %r8d, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $4, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kshiftrb $5, %k0, %k5
; SKX-NEXT: kshiftlb $5, %k5, %k5
; SKX-NEXT: kshiftlb $4, %k0, %k0
; SKX-NEXT: kshiftrb $4, %k0, %k0
; SKX-NEXT: korb %k5, %k0, %k0
; SKX-NEXT: kmovd %r9d, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $3, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k5
; SKX-NEXT: kshiftlb $6, %k5, %k5
; SKX-NEXT: kshiftlb $3, %k0, %k0
; SKX-NEXT: kshiftrb $3, %k0, %k0
; SKX-NEXT: korb %k5, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $2, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kshiftrb $7, %k0, %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftlb $2, %k0, %k0
; SKX-NEXT: kshiftrb $2, %k0, %k0
; SKX-NEXT: korb %k5, %k0, %k0
; SKX-NEXT: kmovb {{[0-9]+}}(%rsp), %k5
; SKX-NEXT: kshiftlb $7, %k5, %k5
; SKX-NEXT: kshiftrb $1, %k5, %k5
; SKX-NEXT: korb %k0, %k5, %k0
; SKX-NEXT: kandb %k4, %k0, %k0
; SKX-NEXT: kandb %k3, %k0, %k0
; SKX-NEXT: kandb %k2, %k0, %k0
; SKX-NEXT: kandb %k1, %k0, %k0
; SKX-NEXT: kshiftrb $6, %k0, %k1
; SKX-NEXT: kmovd %k1, %r8d
; SKX-NEXT: kshiftrb $5, %k0, %k1
; SKX-NEXT: kmovd %k1, %r9d
; SKX-NEXT: kshiftrb $4, %k0, %k1
; SKX-NEXT: kmovd %k1, %r10d
; SKX-NEXT: kshiftrb $3, %k0, %k1
; SKX-NEXT: kmovd %k1, %edi
; SKX-NEXT: kshiftrb $2, %k0, %k1
; SKX-NEXT: kmovd %k1, %ecx
; SKX-NEXT: kshiftrb $1, %k0, %k1
; SKX-NEXT: kmovd %k1, %edx
; SKX-NEXT: kmovd %k0, %esi
; SKX-NEXT: andb $1, %sil
; SKX-NEXT: andb $1, %dl
; SKX-NEXT: addb %dl, %dl
; SKX-NEXT: orb %sil, %dl
; SKX-NEXT: andb $1, %cl
; SKX-NEXT: shlb $2, %cl
; SKX-NEXT: orb %dl, %cl
; SKX-NEXT: andb $1, %dil
; SKX-NEXT: shlb $3, %dil
; SKX-NEXT: orb %cl, %dil
; SKX-NEXT: andb $1, %r10b
; SKX-NEXT: shlb $4, %r10b
; SKX-NEXT: orb %dil, %r10b
; SKX-NEXT: andb $1, %r9b
; SKX-NEXT: shlb $5, %r9b
; SKX-NEXT: orb %r10b, %r9b
; SKX-NEXT: shlb $6, %r8b
; SKX-NEXT: orb %r9b, %r8b
; SKX-NEXT: andb $127, %r8b
; SKX-NEXT: movb %r8b, (%rax)
; SKX-NEXT: retq
;
; KNL_X32-LABEL: test17:
; KNL_X32: ## %bb.0:
; KNL_X32-NEXT: pushl %ebx
; KNL_X32-NEXT: pushl %eax
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: kshiftlw $2, %k0, %k1
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $14, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k2
; KNL_X32-NEXT: kshiftlw $3, %k2, %k2
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $13, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k2
; KNL_X32-NEXT: kshiftlw $4, %k2, %k2
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $12, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k2
; KNL_X32-NEXT: kshiftlw $5, %k2, %k2
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $11, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k2
; KNL_X32-NEXT: kshiftlw $6, %k2, %k2
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $10, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k2
; KNL_X32-NEXT: kshiftlw $7, %k2, %k2
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $9, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kmovw %k0, {{[-0-9]+}}(%e{{[sb]}}p) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $14, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k2
; KNL_X32-NEXT: kshiftlw $3, %k2, %k2
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $13, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k2
; KNL_X32-NEXT: kshiftlw $4, %k2, %k2
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $12, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k2
; KNL_X32-NEXT: kshiftlw $5, %k2, %k2
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $11, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k2
; KNL_X32-NEXT: kshiftlw $6, %k2, %k2
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $10, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k2
; KNL_X32-NEXT: kshiftlw $7, %k2, %k2
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $9, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kmovw %k0, (%esp) ## 2-byte Spill
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $14, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k3
; KNL_X32-NEXT: kshiftlw $3, %k3, %k3
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k3, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $13, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k3
; KNL_X32-NEXT: kshiftlw $4, %k3, %k3
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k3, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $12, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k3
; KNL_X32-NEXT: kshiftlw $5, %k3, %k3
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k3, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $11, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k3
; KNL_X32-NEXT: kshiftlw $6, %k3, %k3
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k3, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $10, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k3
; KNL_X32-NEXT: kshiftlw $7, %k3, %k3
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k3, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k3
; KNL_X32-NEXT: kshiftlw $15, %k3, %k3
; KNL_X32-NEXT: kshiftrw $9, %k3, %k3
; KNL_X32-NEXT: korw %k0, %k3, %k3
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $14, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k4
; KNL_X32-NEXT: kshiftlw $3, %k4, %k4
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $13, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k4
; KNL_X32-NEXT: kshiftlw $4, %k4, %k4
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $12, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k4
; KNL_X32-NEXT: kshiftlw $5, %k4, %k4
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $11, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k4
; KNL_X32-NEXT: kshiftlw $6, %k4, %k4
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $10, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k4
; KNL_X32-NEXT: kshiftlw $7, %k4, %k4
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k4, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k4
; KNL_X32-NEXT: kshiftlw $15, %k4, %k4
; KNL_X32-NEXT: kshiftrw $9, %k4, %k4
; KNL_X32-NEXT: korw %k0, %k4, %k4
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $14, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k5
; KNL_X32-NEXT: kshiftlw $3, %k5, %k5
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $13, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k5
; KNL_X32-NEXT: kshiftlw $4, %k5, %k5
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $12, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k5
; KNL_X32-NEXT: kshiftlw $5, %k5, %k5
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $11, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k5
; KNL_X32-NEXT: kshiftlw $6, %k5, %k5
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $10, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k5
; KNL_X32-NEXT: kshiftlw $7, %k5, %k5
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k5, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k5
; KNL_X32-NEXT: kshiftlw $15, %k5, %k5
; KNL_X32-NEXT: kshiftrw $9, %k5, %k5
; KNL_X32-NEXT: korw %k0, %k5, %k5
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $14, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k6
; KNL_X32-NEXT: kshiftlw $3, %k6, %k6
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $13, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k6
; KNL_X32-NEXT: kshiftlw $4, %k6, %k6
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $12, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k6
; KNL_X32-NEXT: kshiftlw $5, %k6, %k6
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $11, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k6
; KNL_X32-NEXT: kshiftlw $6, %k6, %k6
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $10, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k6
; KNL_X32-NEXT: kshiftlw $7, %k6, %k6
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k6, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k6
; KNL_X32-NEXT: kshiftlw $15, %k6, %k6
; KNL_X32-NEXT: kshiftrw $9, %k6, %k6
; KNL_X32-NEXT: korw %k0, %k6, %k6
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $14, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k7
; KNL_X32-NEXT: kshiftlw $3, %k7, %k7
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $13, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k7
; KNL_X32-NEXT: kshiftlw $4, %k7, %k7
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $12, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k7
; KNL_X32-NEXT: kshiftlw $5, %k7, %k7
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $11, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k7
; KNL_X32-NEXT: kshiftlw $6, %k7, %k7
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $10, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k7
; KNL_X32-NEXT: kshiftlw $7, %k7, %k7
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k7, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k7
; KNL_X32-NEXT: kshiftlw $15, %k7, %k7
; KNL_X32-NEXT: kshiftrw $9, %k7, %k7
; KNL_X32-NEXT: korw %k0, %k7, %k7
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k0
; KNL_X32-NEXT: kshiftlw $15, %k0, %k0
; KNL_X32-NEXT: kshiftrw $15, %k0, %k0
; KNL_X32-NEXT: korw %k1, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $14, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $3, %k0, %k2
; KNL_X32-NEXT: kshiftlw $3, %k2, %k2
; KNL_X32-NEXT: kshiftlw $14, %k0, %k0
; KNL_X32-NEXT: kshiftrw $14, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $13, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $4, %k0, %k2
; KNL_X32-NEXT: kshiftlw $4, %k2, %k2
; KNL_X32-NEXT: kshiftlw $13, %k0, %k0
; KNL_X32-NEXT: kshiftrw $13, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $12, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $5, %k0, %k2
; KNL_X32-NEXT: kshiftlw $5, %k2, %k2
; KNL_X32-NEXT: kshiftlw $12, %k0, %k0
; KNL_X32-NEXT: kshiftrw $12, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $11, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k2
; KNL_X32-NEXT: kshiftlw $6, %k2, %k2
; KNL_X32-NEXT: kshiftlw $11, %k0, %k0
; KNL_X32-NEXT: kshiftrw $11, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $10, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: kshiftrw $7, %k0, %k2
; KNL_X32-NEXT: kshiftlw $7, %k2, %k2
; KNL_X32-NEXT: kshiftlw $10, %k0, %k0
; KNL_X32-NEXT: kshiftrw $10, %k0, %k0
; KNL_X32-NEXT: korw %k2, %k0, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $9, %k2, %k2
; KNL_X32-NEXT: korw %k0, %k2, %k0
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $15, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $14, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kshiftrw $3, %k1, %k2
; KNL_X32-NEXT: kshiftlw $3, %k2, %k2
; KNL_X32-NEXT: kshiftlw $14, %k1, %k1
; KNL_X32-NEXT: kshiftrw $14, %k1, %k1
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $13, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kshiftrw $4, %k1, %k2
; KNL_X32-NEXT: kshiftlw $4, %k2, %k2
; KNL_X32-NEXT: kshiftlw $13, %k1, %k1
; KNL_X32-NEXT: kshiftrw $13, %k1, %k1
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $12, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kshiftrw $5, %k1, %k2
; KNL_X32-NEXT: kshiftlw $5, %k2, %k2
; KNL_X32-NEXT: kshiftlw $12, %k1, %k1
; KNL_X32-NEXT: kshiftrw $12, %k1, %k1
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $11, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kshiftrw $6, %k1, %k2
; KNL_X32-NEXT: kshiftlw $6, %k2, %k2
; KNL_X32-NEXT: kshiftlw $11, %k1, %k1
; KNL_X32-NEXT: kshiftrw $11, %k1, %k1
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $10, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kshiftrw $7, %k1, %k2
; KNL_X32-NEXT: kshiftlw $7, %k2, %k2
; KNL_X32-NEXT: kshiftlw $10, %k1, %k1
; KNL_X32-NEXT: kshiftrw $10, %k1, %k1
; KNL_X32-NEXT: korw %k2, %k1, %k1
; KNL_X32-NEXT: movb {{[0-9]+}}(%esp), %al
; KNL_X32-NEXT: kmovw %eax, %k2
; KNL_X32-NEXT: kshiftlw $15, %k2, %k2
; KNL_X32-NEXT: kshiftrw $9, %k2, %k2
; KNL_X32-NEXT: korw %k1, %k2, %k1
; KNL_X32-NEXT: kandw %k1, %k0, %k0
; KNL_X32-NEXT: kandw %k7, %k0, %k0
; KNL_X32-NEXT: kandw %k6, %k0, %k0
; KNL_X32-NEXT: kandw %k5, %k0, %k0
; KNL_X32-NEXT: kandw %k4, %k0, %k0
; KNL_X32-NEXT: kandw %k3, %k0, %k0
; KNL_X32-NEXT: kmovw (%esp), %k1 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k1, %k0, %k0
; KNL_X32-NEXT: kmovw {{[-0-9]+}}(%e{{[sb]}}p), %k1 ## 2-byte Reload
; KNL_X32-NEXT: kandw %k1, %k0, %k0
; KNL_X32-NEXT: kshiftrw $6, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %ecx
; KNL_X32-NEXT: kshiftrw $5, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %eax
; KNL_X32-NEXT: kshiftrw $1, %k0, %k1
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $2, %k0, %k1
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andb $1, %bl
; KNL_X32-NEXT: andb $1, %dl
; KNL_X32-NEXT: addb %dl, %dl
; KNL_X32-NEXT: orb %bl, %dl
; KNL_X32-NEXT: kmovw %k1, %ebx
; KNL_X32-NEXT: kshiftrw $3, %k0, %k1
; KNL_X32-NEXT: andb $1, %bl
; KNL_X32-NEXT: shlb $2, %bl
; KNL_X32-NEXT: orb %dl, %bl
; KNL_X32-NEXT: kmovw %k1, %edx
; KNL_X32-NEXT: kshiftrw $4, %k0, %k0
; KNL_X32-NEXT: andb $1, %dl
; KNL_X32-NEXT: shlb $3, %dl
; KNL_X32-NEXT: orb %bl, %dl
; KNL_X32-NEXT: kmovw %k0, %ebx
; KNL_X32-NEXT: andb $1, %bl
; KNL_X32-NEXT: shlb $4, %bl
; KNL_X32-NEXT: orb %dl, %bl
; KNL_X32-NEXT: andb $1, %al
; KNL_X32-NEXT: shlb $5, %al
; KNL_X32-NEXT: orb %bl, %al
; KNL_X32-NEXT: shlb $6, %cl
; KNL_X32-NEXT: orb %al, %cl
; KNL_X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; KNL_X32-NEXT: andb $127, %cl
; KNL_X32-NEXT: movb %cl, (%eax)
; KNL_X32-NEXT: addl $4, %esp
; KNL_X32-NEXT: popl %ebx
; KNL_X32-NEXT: retl $4
%j = and <7 x i1> %a, %b
%k = and <7 x i1> %j, %c
%l = and <7 x i1> %k, %d
%m = and <7 x i1> %l, %e
%n = and <7 x i1> %m, %f
%o = and <7 x i1> %n, %g
%p = and <7 x i1> %o, %h
%q = and <7 x i1> %p, %i
ret <7 x i1> %q
}