llvm-project/llvm/test/CodeGen/X86/avx512-intrinsics-fast-isel.ll

2483 lines
98 KiB
LLVM
Raw Normal View History

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X32
; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=X64
; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512f-builtins.c
define zeroext i16 @test_mm512_kunpackb(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D, <8 x i64> %__E, <8 x i64> %__F) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_kunpackb:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-64, %esp
; X32-NEXT: subl $64, %esp
; X32-NEXT: vmovdqa64 136(%ebp), %zmm3
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
; X32-NEXT: kunpckbw %k0, %k1, %k1
; X32-NEXT: vpcmpneqd 72(%ebp), %zmm3, %k0 {%k1}
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_kunpackb:
; X64: # %bb.0: # %entry
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; X64-NEXT: kunpckbw %k0, %k1, %k1
; X64-NEXT: vpcmpneqd %zmm5, %zmm4, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__E to <16 x i32>
%1 = bitcast <8 x i64> %__F to <16 x i32>
%2 = bitcast <8 x i64> %__A to <16 x i32>
%3 = bitcast <8 x i64> %__B to <16 x i32>
%4 = icmp ne <16 x i32> %2, %3
%5 = bitcast <8 x i64> %__C to <16 x i32>
%6 = bitcast <8 x i64> %__D to <16 x i32>
%7 = icmp ne <16 x i32> %5, %6
%8 = shufflevector <16 x i1> %4, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%9 = shufflevector <16 x i1> %7, <16 x i1> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%10 = shufflevector <8 x i1> %8, <8 x i1> %9, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
%11 = icmp ne <16 x i32> %0, %1
%12 = and <16 x i1> %11, %10
%13 = bitcast <16 x i1> %12 to i16
ret i16 %13
}
define i32 @test_mm512_kortestc(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
; X32-LABEL: test_mm512_kortestc:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-64, %esp
; X32-NEXT: subl $64, %esp
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
; X32-NEXT: korw %k0, %k1, %k0
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: cmpw $-1, %ax
; X32-NEXT: sete %al
; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_kortestc:
; X64: # %bb.0: # %entry
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; X64-NEXT: korw %k0, %k1, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: cmpw $-1, %ax
; X64-NEXT: sete %al
; X64-NEXT: andb $1, %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = icmp ne <16 x i32> %0, %1
%3 = bitcast <8 x i64> %__C to <16 x i32>
%4 = bitcast <8 x i64> %__D to <16 x i32>
%5 = icmp ne <16 x i32> %3, %4
%6 = or <16 x i1> %5, %2 %7 = bitcast <16 x i1> %6 to i16
%8 = icmp eq i16 %7, -1
%9 = zext i1 %8 to i32
ret i32 %9
}
define i32 @test_mm512_kortestz(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, <8 x i64> %__D) {
; X32-LABEL: test_mm512_kortestz:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-64, %esp
; X32-NEXT: subl $64, %esp
; X32-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X32-NEXT: vpcmpneqd 8(%ebp), %zmm2, %k1
; X32-NEXT: korw %k0, %k1, %k0
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: cmpw $0, %ax
; X32-NEXT: sete %al
; X32-NEXT: andb $1, %al
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
Correct dwarf unwind information in function epilogue This patch aims to provide correct dwarf unwind information in function epilogue for X86. It consists of two parts. The first part inserts CFI instructions that set appropriate cfa offset and cfa register in emitEpilogue() in X86FrameLowering. This part is X86 specific. The second part is platform independent and ensures that: * CFI instructions do not affect code generation (they are not counted as instructions when tail duplicating or tail merging) * Unwind information remains correct when a function is modified by different passes. This is done in a late pass by analyzing information about cfa offset and cfa register in BBs and inserting additional CFI directives where necessary. Added CFIInstrInserter pass: * analyzes each basic block to determine cfa offset and register are valid at its entry and exit * verifies that outgoing cfa offset and register of predecessor blocks match incoming values of their successors * inserts additional CFI directives at basic block beginning to correct the rule for calculating CFA Having CFI instructions in function epilogue can cause incorrect CFA calculation rule for some basic blocks. This can happen if, due to basic block reordering, or the existence of multiple epilogue blocks, some of the blocks have wrong cfa offset and register values set by the epilogue block above them. CFIInstrInserter is currently run only on X86, but can be used by any target that implements support for adding CFI instructions in epilogue. Patch by Violeta Vukobrat. Differential Revision: https://reviews.llvm.org/D42848 llvm-svn: 330706
2018-04-24 18:32:08 +08:00
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_kortestz:
; X64: # %bb.0: # %entry
; X64-NEXT: vpcmpneqd %zmm1, %zmm0, %k0
; X64-NEXT: vpcmpneqd %zmm3, %zmm2, %k1
; X64-NEXT: korw %k0, %k1, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: cmpw $0, %ax
; X64-NEXT: sete %al
; X64-NEXT: andb $1, %al
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = icmp ne <16 x i32> %0, %1
%3 = bitcast <8 x i64> %__C to <16 x i32>
%4 = bitcast <8 x i64> %__D to <16 x i32>
%5 = icmp ne <16 x i32> %3, %4
%6 = or <16 x i1> %5, %2
%7 = bitcast <16 x i1> %6 to i16
%8 = icmp eq i16 %7, 0
%9 = zext i1 %8 to i32
ret i32 %9
}
define <16 x float> @test_mm512_shuffle_f32x4(<16 x float> %__A, <16 x float> %__B) {
; X32-LABEL: test_mm512_shuffle_f32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_f32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: vshuff32x4 {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
ret <16 x float> %shuffle
}
define <16 x float> @test_mm512_mask_shuffle_f32x4(<16 x float> %__W, i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X32-LABEL: test_mm512_mask_shuffle_f32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_f32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
%0 = bitcast i16 %__U to <16 x i1>
%1 = select <16 x i1> %0, <16 x float> %shuffle, <16 x float> %__W
ret <16 x float> %1
}
define <16 x float> @test_mm512_maskz_shuffle_f32x4(i16 zeroext %__U, <16 x float> %__A, <16 x float> %__B) {
; X32-LABEL: test_mm512_maskz_shuffle_f32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_f32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <16 x float> %__A, <16 x float> %__B, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
%0 = bitcast i16 %__U to <16 x i1>
%1 = select <16 x i1> %0, <16 x float> %shuffle, <16 x float> zeroinitializer
ret <16 x float> %1
}
define <8 x double> @test_mm512_shuffle_f64x2(<8 x double> %__A, <8 x double> %__B) {
; X32-LABEL: test_mm512_shuffle_f64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_f64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
ret <8 x double> %shuffle
}
define <8 x double> @test_mm512_mask_shuffle_f64x2(<8 x double> %__W, i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X32-LABEL: test_mm512_mask_shuffle_f64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_f64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x double> %shuffle, <8 x double> %__W
ret <8 x double> %1
}
define <8 x double> @test_mm512_maskz_shuffle_f64x2(i8 zeroext %__U, <8 x double> %__A, <8 x double> %__B) {
; X32-LABEL: test_mm512_maskz_shuffle_f64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_f64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x double> %__A, <8 x double> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x double> %shuffle, <8 x double> zeroinitializer
ret <8 x double> %1
}
define <8 x i64> @test_mm512_shuffle_i32x4(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_shuffle_i32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_i32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
ret <8 x i64> %shuffle
}
define <8 x i64> @test_mm512_mask_shuffle_i32x4(<8 x i64> %__W, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_mask_shuffle_i32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_i32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3,4,5,6,7],zmm2[0,1,2,3,0,1,2,3]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast <8 x i64> %shuffle to <16 x i32>
%1 = bitcast <8 x i64> %__W to <16 x i32>
%2 = bitcast i16 %__U to <16 x i1>
%3 = select <16 x i1> %2, <16 x i32> %0, <16 x i32> %1
%4 = bitcast <16 x i32> %3 to <8 x i64>
ret <8 x i64> %4
}
define <8 x i64> @test_mm512_maskz_shuffle_i32x4(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_maskz_shuffle_i32x4:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_i32x4:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,7],zmm1[0,1,2,3,0,1,2,3]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast <8 x i64> %shuffle to <16 x i32>
%1 = bitcast i16 %__U to <16 x i1>
%2 = select <16 x i1> %1, <16 x i32> %0, <16 x i32> zeroinitializer
%3 = bitcast <16 x i32> %2 to <8 x i64>
ret <8 x i64> %3
}
define <8 x i64> @test_mm512_shuffle_i64x2(<8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_shuffle_i64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_i64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
ret <8 x i64> %shuffle
}
define <8 x i64> @test_mm512_mask_shuffle_i64x2(<8 x i64> %__W, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_mask_shuffle_i64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_i64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,3],zmm2[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x i64> %shuffle, <8 x i64> %__W
ret <8 x i64> %1
}
define <8 x i64> @test_mm512_maskz_shuffle_i64x2(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) local_unnamed_addr #0 {
; X32-LABEL: test_mm512_maskz_shuffle_i64x2:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_i64x2:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],zmm1[0,1,0,1]
; X64-NEXT: retq
entry:
%shuffle = shufflevector <8 x i64> %__A, <8 x i64> %__B, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 8, i32 9>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x i64> %shuffle, <8 x i64> zeroinitializer
ret <8 x i64> %1
}
define zeroext i16 @test_mm512_testn_epi32_mask(<8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_testn_epi32_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_testn_epi32_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
%1 = icmp eq <16 x i32> %0, zeroinitializer
%2 = bitcast <16 x i1> %1 to i16
ret i16 %2
}
define zeroext i16 @test_mm512_mask_testn_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_testn_epi32_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_testn_epi32_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vptestnmd %zmm0, %zmm1, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
%1 = icmp eq <16 x i32> %0, zeroinitializer
%2 = bitcast i16 %__U to <16 x i1>
%3 = and <16 x i1> %1, %2
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i8 @test_mm512_testn_epi64_mask(<8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_testn_epi64_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_testn_epi64_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
%1 = bitcast <8 x i1> %0 to i8
ret i8 %1
}
define zeroext i8 @test_mm512_mask_testn_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_testn_epi64_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_testn_epi64_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vptestnmq %zmm0, %zmm1, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = icmp eq <8 x i64> %and1.i.i, zeroinitializer
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
define zeroext i16 @test_mm512_mask_test_epi32_mask(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_test_epi32_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzwl %ax, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_test_epi32_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vptestmd %zmm0, %zmm1, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzwl %ax, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = bitcast <8 x i64> %and1.i.i to <16 x i32>
%1 = icmp ne <16 x i32> %0, zeroinitializer
%2 = bitcast i16 %__U to <16 x i1>
%3 = and <16 x i1> %1, %2
%4 = bitcast <16 x i1> %3 to i16
ret i16 %4
}
define zeroext i8 @test_mm512_mask_test_epi64_mask(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
; X32-LABEL: test_mm512_mask_test_epi64_mask:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
; X32-NEXT: kmovw %k0, %eax
; X32-NEXT: movzbl %al, %eax
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_test_epi64_mask:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vptestmq %zmm0, %zmm1, %k0 {%k1}
; X64-NEXT: kmovw %k0, %eax
; X64-NEXT: movzbl %al, %eax
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%and1.i.i = and <8 x i64> %__B, %__A
%0 = icmp ne <8 x i64> %and1.i.i, zeroinitializer
%1 = bitcast i8 %__U to <8 x i1>
%2 = and <8 x i1> %0, %1
%3 = bitcast <8 x i1> %2 to i8
ret i8 %3
}
define <8 x i64> @test_mm512_mask_set1_epi32(<8 x i64> %__O, i16 zeroext %__M, i32 %__A) {
; X32-LABEL: test_mm512_mask_set1_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_set1_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %esi, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0
%vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
%0 = bitcast <8 x i64> %__O to <16 x i32>
%1 = bitcast i16 %__M to <16 x i1>
%2 = select <16 x i1> %1, <16 x i32> %vecinit15.i.i, <16 x i32> %0
%3 = bitcast <16 x i32> %2 to <8 x i64>
ret <8 x i64> %3
}
define <8 x i64> @test_mm512_maskz_set1_epi32(i16 zeroext %__M, i32 %__A) {
; X32-LABEL: test_mm512_maskz_set1_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpbroadcastd %eax, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_set1_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %esi, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%vecinit.i.i = insertelement <16 x i32> undef, i32 %__A, i32 0
%vecinit15.i.i = shufflevector <16 x i32> %vecinit.i.i, <16 x i32> undef, <16 x i32> zeroinitializer
%0 = bitcast i16 %__M to <16 x i1>
%1 = select <16 x i1> %0, <16 x i32> %vecinit15.i.i, <16 x i32> zeroinitializer
%2 = bitcast <16 x i32> %1 to <8 x i64>
ret <8 x i64> %2
}
define <8 x i64> @test_mm512_mask_set1_epi64(<8 x i64> %__O, i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm512_mask_set1_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_set1_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %rsi, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0
%vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
%0 = bitcast i8 %__M to <8 x i1>
%1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> %__O
ret <8 x i64> %1
}
define <8 x i64> @test_mm512_maskz_set1_epi64(i8 zeroext %__M, i64 %__A) {
; X32-LABEL: test_mm512_maskz_set1_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_set1_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %rsi, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%vecinit.i.i = insertelement <8 x i64> undef, i64 %__A, i32 0
%vecinit7.i.i = shufflevector <8 x i64> %vecinit.i.i, <8 x i64> undef, <8 x i32> zeroinitializer
%0 = bitcast i8 %__M to <8 x i1>
%1 = select <8 x i1> %0, <8 x i64> %vecinit7.i.i, <8 x i64> zeroinitializer
ret <8 x i64> %1
}
define <8 x i64> @test_mm512_broadcastd_epi32(<2 x i64> %a0) {
; X32-LABEL: test_mm512_broadcastd_epi32:
; X32: # %bb.0:
; X32-NEXT: vbroadcastss %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastd_epi32:
; X64: # %bb.0:
; X64-NEXT: vbroadcastss %xmm0, %zmm0
; X64-NEXT: retq
%arg0 = bitcast <2 x i64> %a0 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg0, <4 x i32> undef, <16 x i32> zeroinitializer
%res1 = bitcast <16 x i32> %res0 to <8 x i64>
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_mask_broadcastd_epi32(<8 x i64> %a0, i16 %a1, <2 x i64> %a2) {
; X32-LABEL: test_mm512_mask_broadcastd_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_broadcastd_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %xmm1, %zmm0 {%k1}
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast i16 %a1 to <16 x i1>
%arg2 = bitcast <2 x i64> %a2 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg2, <4 x i32> undef, <16 x i32> zeroinitializer
%res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_maskz_broadcastd_epi32(i16 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm512_maskz_broadcastd_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_broadcastd_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %xmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%arg1 = bitcast <2 x i64> %a1 to <4 x i32>
%res0 = shufflevector <4 x i32> %arg1, <4 x i32> undef, <16 x i32> zeroinitializer
%res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_broadcastq_epi64(<2 x i64> %a0) {
; X32-LABEL: test_mm512_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: vbroadcastsd %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: vbroadcastsd %xmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> undef, <8 x i32> zeroinitializer
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_mask_broadcastq_epi64(<8 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X32-LABEL: test_mm512_mask_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm1, %zmm0 {%k1}
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <2 x i64> %a2, <2 x i64> undef, <8 x i32> zeroinitializer
%res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm512_maskz_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <2 x i64> %a1, <2 x i64> undef, <8 x i32> zeroinitializer
%res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
ret <8 x i64> %res1
}
define <8 x double> @test_mm512_broadcastsd_pd(<2 x double> %a0) {
; X32-LABEL: test_mm512_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: vbroadcastsd %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: vbroadcastsd %xmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> undef, <8 x i32> zeroinitializer
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_broadcastsd_pd(<8 x double> %a0, i8 %a1, <2 x double> %a2) {
; X32-LABEL: test_mm512_mask_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastsd %xmm1, %zmm0 {%k1}
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <2 x double> %a2, <2 x double> undef, <8 x i32> zeroinitializer
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm512_maskz_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <2 x double> %a1, <2 x double> undef, <8 x i32> zeroinitializer
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <16 x float> @test_mm512_broadcastss_ps(<4 x float> %a0) {
; X32-LABEL: test_mm512_broadcastss_ps:
; X32: # %bb.0:
; X32-NEXT: vbroadcastss %xmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_broadcastss_ps:
; X64: # %bb.0:
; X64-NEXT: vbroadcastss %xmm0, %zmm0
; X64-NEXT: retq
%res = shufflevector <4 x float> %a0, <4 x float> undef, <16 x i32> zeroinitializer
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_broadcastss_ps(<16 x float> %a0, i16 %a1, <4 x float> %a2) {
; X32-LABEL: test_mm512_mask_broadcastss_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_broadcastss_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastss %xmm1, %zmm0 {%k1}
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <4 x float> %a2, <4 x float> undef, <16 x i32> zeroinitializer
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_broadcastss_ps(i16 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm512_maskz_broadcastss_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_broadcastss_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <4 x float> %a1, <4 x float> undef, <16 x i32> zeroinitializer
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <8 x double> @test_mm512_movddup_pd(<8 x double> %a0) {
; X32-LABEL: test_mm512_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_movddup_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_mask_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_movddup_pd(i8 %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_maskz_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <16 x float> @test_mm512_movehdup_ps(<16 x float> %a0) {
; X32-LABEL: test_mm512_movehdup_ps:
; X32: # %bb.0:
; X32-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_movehdup_ps:
; X64: # %bb.0:
; X64-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X64-NEXT: retq
%res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_movehdup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X32-LABEL: test_mm512_mask_movehdup_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_movehdup_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = zmm1[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_movehdup_ps(i16 %a0, <16 x float> %a1) {
; X32-LABEL: test_mm512_maskz_movehdup_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_movehdup_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <16 x float> @test_mm512_moveldup_ps(<16 x float> %a0) {
; X32-LABEL: test_mm512_moveldup_ps:
; X32: # %bb.0:
; X32-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_moveldup_ps:
; X64: # %bb.0:
; X64-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X64-NEXT: retq
%res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_moveldup_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X32-LABEL: test_mm512_mask_moveldup_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_moveldup_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = zmm1[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_moveldup_ps(i16 %a0, <16 x float> %a1) {
; X32-LABEL: test_mm512_maskz_moveldup_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_moveldup_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <8 x double> @test_mm512_permute_pd(<8 x double> %a0) {
; X32-LABEL: test_mm512_permute_pd:
; X32: # %bb.0:
; X32-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_permute_pd:
; X64: # %bb.0:
; X64-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,6]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_permute_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_mask_permute_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_permute_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = zmm1[0,1,2,2,4,4,6,6]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_permute_pd(i8 %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_maskz_permute_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_permute_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,6]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <16 x float> @test_mm512_permute_ps(<16 x float> %a0) {
; X32-LABEL: test_mm512_permute_ps:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_permute_ps:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X64-NEXT: retq
%res = shufflevector <16 x float> %a0, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_permute_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2) {
; X32-LABEL: test_mm512_mask_permute_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_permute_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm1[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <16 x float> %a2, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_permute_ps(i16 %a0, <16 x float> %a1) {
; X32-LABEL: test_mm512_maskz_permute_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_permute_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,0,0,6,4,4,4,10,8,8,8,14,12,12,12]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <16 x float> %a1, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 0, i32 6, i32 4, i32 4, i32 4, i32 10, i32 8, i32 8, i32 8, i32 14, i32 12, i32 12, i32 12>
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <8 x i64> @test_mm512_permutex_epi64(<8 x i64> %a0) {
; X32-LABEL: test_mm512_permutex_epi64:
; X32: # %bb.0:
; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_permutex_epi64:
; X64: # %bb.0:
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%res = shufflevector <8 x i64> %a0, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_mask_permutex_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_mask_permutex_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_permutex_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x i64> %a2, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
%res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_maskz_permutex_epi64(i8 %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_maskz_permutex_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_permutex_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x i64> %a1, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
%res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
ret <8 x i64> %res1
}
define <8 x double> @test_mm512_permutex_pd(<8 x double> %a0) {
; X32-LABEL: test_mm512_permutex_pd:
; X32: # %bb.0:
; X32-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_permutex_pd:
; X64: # %bb.0:
; X64-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_permutex_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_mask_permutex_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_permutex_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = zmm1[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_permutex_pd(i8 %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_maskz_permutex_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_permutex_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <8 x i64> @test_mm512_shuffle_epi32(<8 x i64> %a0) {
; X32-LABEL: test_mm512_shuffle_epi32:
; X32: # %bb.0:
; X32-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_epi32:
; X64: # %bb.0:
; X64-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
%res1 = bitcast <16 x i32> %res0 to <8 x i64>
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_mask_shuffle_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_mask_shuffle_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = zmm1[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast i16 %a1 to <16 x i1>
%arg2 = bitcast <8 x i64> %a2 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg2, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
%res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_maskz_shuffle_epi32(i16 %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_maskz_shuffle_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,0,0,5,4,4,4,9,8,8,8,13,12,12,12]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg1, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4, i32 9, i32 8, i32 8, i32 8, i32 13, i32 12, i32 12, i32 12>
%res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x double> @test_mm512_shuffle_pd(<8 x double> %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_shuffle_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X32-LABEL: test_mm512_mask_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[3],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_shuffle_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_maskz_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[3],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 3, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <8 x i64> @test_mm512_unpackhi_epi32(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpackhi_epi32:
; X32: # %bb.0:
; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_epi32:
; X64: # %bb.0:
; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
%res1 = bitcast <16 x i32> %res0 to <8 x i64>
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_mask_unpackhi_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X32-LABEL: test_mm512_mask_unpackhi_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpackhi_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast i16 %a1 to <16 x i1>
%arg2 = bitcast <8 x i64> %a2 to <16 x i32>
%arg3 = bitcast <8 x i64> %a3 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
%res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_maskz_unpackhi_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_maskz_unpackhi_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpackhi_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckhdq {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
%arg2 = bitcast <8 x i64> %a2 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
%res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_unpackhi_epi64(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpackhi_epi64:
; X32: # %bb.0:
; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_epi64:
; X64: # %bb.0:
; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: retq
%res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_mask_unpackhi_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X32-LABEL: test_mm512_mask_unpackhi_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpackhi_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_maskz_unpackhi_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_maskz_unpackhi_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpackhi_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckhqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
ret <8 x i64> %res1
}
define <8 x double> @test_mm512_unpackhi_pd(<8 x double> %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_unpackhi_pd:
; X32: # %bb.0:
; X32-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_pd:
; X64: # %bb.0:
; X64-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_unpackhi_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X32-LABEL: test_mm512_mask_unpackhi_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpackhi_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} = zmm1[1],zmm2[1],zmm1[3],zmm2[3],zmm1[5],zmm2[5],zmm1[7],zmm2[7]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_unpackhi_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_maskz_unpackhi_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpackhi_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <16 x float> @test_mm512_unpackhi_ps(<16 x float> %a0, <16 x float> %a1) {
; X32-LABEL: test_mm512_unpackhi_ps:
; X32: # %bb.0:
; X32-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpackhi_ps:
; X64: # %bb.0:
; X64-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: retq
%res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_unpackhi_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X32-LABEL: test_mm512_mask_unpackhi_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpackhi_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} = zmm1[2],zmm2[2],zmm1[3],zmm2[3],zmm1[6],zmm2[6],zmm1[7],zmm2[7],zmm1[10],zmm2[10],zmm1[11],zmm2[11],zmm1[14],zmm2[14],zmm1[15],zmm2[15]
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_unpackhi_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X32-LABEL: test_mm512_maskz_unpackhi_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpackhi_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <8 x i64> @test_mm512_unpacklo_epi32(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpacklo_epi32:
; X32: # %bb.0:
; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_epi32:
; X64: # %bb.0:
; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg0, <16 x i32> %arg1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
%res1 = bitcast <16 x i32> %res0 to <8 x i64>
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_mask_unpacklo_epi32(<8 x i64> %a0, i16 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X32-LABEL: test_mm512_mask_unpacklo_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpacklo_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X64-NEXT: retq
%arg0 = bitcast <8 x i64> %a0 to <16 x i32>
%arg1 = bitcast i16 %a1 to <16 x i1>
%arg2 = bitcast <8 x i64> %a2 to <16 x i32>
%arg3 = bitcast <8 x i64> %a3 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg2, <16 x i32> %arg3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
%res1 = select <16 x i1> %arg1, <16 x i32> %res0, <16 x i32> %arg0
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_maskz_unpacklo_epi32(i16 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_maskz_unpacklo_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpacklo_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpckldq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%arg1 = bitcast <8 x i64> %a1 to <16 x i32>
%arg2 = bitcast <8 x i64> %a2 to <16 x i32>
%res0 = shufflevector <16 x i32> %arg1, <16 x i32> %arg2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
%res1 = select <16 x i1> %arg0, <16 x i32> %res0, <16 x i32> zeroinitializer
%res2 = bitcast <16 x i32> %res1 to <8 x i64>
ret <8 x i64> %res2
}
define <8 x i64> @test_mm512_unpacklo_epi64(<8 x i64> %a0, <8 x i64> %a1) {
; X32-LABEL: test_mm512_unpacklo_epi64:
; X32: # %bb.0:
; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_epi64:
; X64: # %bb.0:
; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%res = shufflevector <8 x i64> %a0, <8 x i64> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_mask_unpacklo_epi64(<8 x i64> %a0, i8 %a1, <8 x i64> %a2, <8 x i64> %a3) {
; X32-LABEL: test_mm512_mask_unpacklo_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpacklo_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x i64> %a2, <8 x i64> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg1, <8 x i64> %res0, <8 x i64> %a0
ret <8 x i64> %res1
}
define <8 x i64> @test_mm512_maskz_unpacklo_epi64(i8 %a0, <8 x i64> %a1, <8 x i64> %a2) {
; X32-LABEL: test_mm512_maskz_unpacklo_epi64:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpacklo_epi64:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpunpcklqdq {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x i64> %a1, <8 x i64> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg0, <8 x i64> %res0, <8 x i64> zeroinitializer
ret <8 x i64> %res1
}
define <8 x double> @test_mm512_unpacklo_pd(<8 x double> %a0, <8 x double> %a1) {
; X32-LABEL: test_mm512_unpacklo_pd:
; X32: # %bb.0:
; X32-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_pd:
; X64: # %bb.0:
; X64-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%res = shufflevector <8 x double> %a0, <8 x double> %a1, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
ret <8 x double> %res
}
define <8 x double> @test_mm512_mask_unpacklo_pd(<8 x double> %a0, i8 %a1, <8 x double> %a2, <8 x double> %a3) {
; X32-LABEL: test_mm512_mask_unpacklo_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpacklo_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[2],zmm2[2],zmm1[4],zmm2[4],zmm1[6],zmm2[6]
; X64-NEXT: retq
%arg1 = bitcast i8 %a1 to <8 x i1>
%res0 = shufflevector <8 x double> %a2, <8 x double> %a3, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg1, <8 x double> %res0, <8 x double> %a0
ret <8 x double> %res1
}
define <8 x double> @test_mm512_maskz_unpacklo_pd(i8 %a0, <8 x double> %a1, <8 x double> %a2) {
; X32-LABEL: test_mm512_maskz_unpacklo_pd:
; X32: # %bb.0:
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpacklo_pd:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
; X64-NEXT: retq
%arg0 = bitcast i8 %a0 to <8 x i1>
%res0 = shufflevector <8 x double> %a1, <8 x double> %a2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
%res1 = select <8 x i1> %arg0, <8 x double> %res0, <8 x double> zeroinitializer
ret <8 x double> %res1
}
define <16 x float> @test_mm512_unpacklo_ps(<16 x float> %a0, <16 x float> %a1) {
; X32-LABEL: test_mm512_unpacklo_ps:
; X32: # %bb.0:
; X32-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_unpacklo_ps:
; X64: # %bb.0:
; X64-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%res = shufflevector <16 x float> %a0, <16 x float> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
ret <16 x float> %res
}
define <16 x float> @test_mm512_mask_unpacklo_ps(<16 x float> %a0, i16 %a1, <16 x float> %a2, <16 x float> %a3) {
; X32-LABEL: test_mm512_mask_unpacklo_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_unpacklo_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} = zmm1[0],zmm2[0],zmm1[1],zmm2[1],zmm1[4],zmm2[4],zmm1[5],zmm2[5],zmm1[8],zmm2[8],zmm1[9],zmm2[9],zmm1[12],zmm2[12],zmm1[13],zmm2[13]
; X64-NEXT: retq
%arg1 = bitcast i16 %a1 to <16 x i1>
%res0 = shufflevector <16 x float> %a2, <16 x float> %a3, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
%res1 = select <16 x i1> %arg1, <16 x float> %res0, <16 x float> %a0
ret <16 x float> %res1
}
define <16 x float> @test_mm512_maskz_unpacklo_ps(i16 %a0, <16 x float> %a1, <16 x float> %a2) {
; X32-LABEL: test_mm512_maskz_unpacklo_ps:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_unpacklo_ps:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
; X64-NEXT: retq
%arg0 = bitcast i16 %a0 to <16 x i1>
%res0 = shufflevector <16 x float> %a1, <16 x float> %a2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
%res1 = select <16 x i1> %arg0, <16 x float> %res0, <16 x float> zeroinitializer
ret <16 x float> %res1
}
define <8 x double> @test_mm512_zextpd128_pd512(<2 x double> %a0) nounwind {
; X32-LABEL: test_mm512_zextpd128_pd512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextpd128_pd512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm0, %xmm0
; X64-NEXT: retq
%res = shufflevector <2 x double> %a0, <2 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x double> %res
}
define <8 x double> @test_mm512_zextpd256_pd512(<4 x double> %a0) nounwind {
; X32-LABEL: test_mm512_zextpd256_pd512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextpd256_pd512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <4 x double> %a0, <4 x double> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x double> %res
}
define <16 x float> @test_mm512_zextps128_ps512(<4 x float> %a0) nounwind {
; X32-LABEL: test_mm512_zextps128_ps512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextps128_ps512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm0, %xmm0
; X64-NEXT: retq
%res = shufflevector <4 x float> %a0, <4 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7>
ret <16 x float> %res
}
define <16 x float> @test_mm512_zextps256_ps512(<8 x float> %a0) nounwind {
; X32-LABEL: test_mm512_zextps256_ps512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextps256_ps512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <8 x float> %a0, <8 x float> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
ret <16 x float> %res
}
define <8 x i64> @test_mm512_zextsi128_si512(<2 x i64> %a0) nounwind {
; X32-LABEL: test_mm512_zextsi128_si512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextsi128_si512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %xmm0, %xmm0
; X64-NEXT: retq
%res = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 2, i32 3>
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_zextsi256_si512(<4 x i64> %a0) nounwind {
; X32-LABEL: test_mm512_zextsi256_si512:
; X32: # %bb.0:
; X32-NEXT: vmovaps %ymm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_zextsi256_si512:
; X64: # %bb.0:
; X64-NEXT: vmovaps %ymm0, %ymm0
; X64-NEXT: retq
%res = shufflevector <4 x i64> %a0, <4 x i64> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i64> %res
}
define <8 x i64> @test_mm512_mul_epi32(<8 x i64> %__A, <8 x i64> %__B) nounwind {
; X32-LABEL: test_mm512_mul_epi32:
; X32: # %bb.0:
; X32-NEXT: vpsllq $32, %zmm0, %zmm0
; X32-NEXT: vpsraq $32, %zmm0, %zmm0
; X32-NEXT: vpsllq $32, %zmm1, %zmm1
; X32-NEXT: vpsraq $32, %zmm1, %zmm1
; X32-NEXT: vpmuldq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mul_epi32:
; X64: # %bb.0:
; X64-NEXT: vpsllq $32, %zmm0, %zmm0
; X64-NEXT: vpsraq $32, %zmm0, %zmm0
; X64-NEXT: vpsllq $32, %zmm1, %zmm1
; X64-NEXT: vpsraq $32, %zmm1, %zmm1
; X64-NEXT: vpmuldq %zmm0, %zmm1, %zmm0
; X64-NEXT: retq
%tmp = shl <8 x i64> %__A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp1 = ashr exact <8 x i64> %tmp, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp2 = shl <8 x i64> %__B, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp3 = ashr exact <8 x i64> %tmp2, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp4 = mul nsw <8 x i64> %tmp3, %tmp1
ret <8 x i64> %tmp4
}
define <8 x i64> @test_mm512_maskz_mul_epi32(i16 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
; X32-LABEL: test_mm512_maskz_mul_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmuldq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_mul_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmuldq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
%conv = trunc i16 %__k to i8
%tmp = shl <8 x i64> %__A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp1 = ashr exact <8 x i64> %tmp, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp2 = shl <8 x i64> %__B, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp3 = ashr exact <8 x i64> %tmp2, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp4 = mul nsw <8 x i64> %tmp3, %tmp1
%tmp5 = bitcast i8 %conv to <8 x i1>
%tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> zeroinitializer
ret <8 x i64> %tmp6
}
define <8 x i64> @test_mm512_mask_mul_epi32(i16 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
; X32-LABEL: test_mm512_mask_mul_epi32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmuldq %zmm0, %zmm1, %zmm2 {%k1}
; X32-NEXT: vmovdqa64 %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_mul_epi32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmuldq %zmm0, %zmm1, %zmm2 {%k1}
; X64-NEXT: vmovdqa64 %zmm2, %zmm0
; X64-NEXT: retq
%conv = trunc i16 %__k to i8
%tmp = shl <8 x i64> %__A, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp1 = ashr exact <8 x i64> %tmp, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp2 = shl <8 x i64> %__B, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp3 = ashr exact <8 x i64> %tmp2, <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
%tmp4 = mul nsw <8 x i64> %tmp3, %tmp1
%tmp5 = bitcast i8 %conv to <8 x i1>
%tmp6 = select <8 x i1> %tmp5, <8 x i64> %tmp4, <8 x i64> %__src
ret <8 x i64> %tmp6
}
define <8 x i64> @test_mm512_mul_epu32(<8 x i64> %__A, <8 x i64> %__B) nounwind {
; X32-LABEL: test_mm512_mul_epu32:
; X32: # %bb.0:
; X32-NEXT: movw $-21846, %ax # imm = 0xAAAA
; X32-NEXT: kmovw %eax, %k0
; X32-NEXT: knotw %k0, %k1
; X32-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X32-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; X32-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mul_epu32:
; X64: # %bb.0:
; X64-NEXT: movw $-21846, %ax # imm = 0xAAAA
; X64-NEXT: kmovw %eax, %k0
; X64-NEXT: knotw %k0, %k1
; X64-NEXT: vmovdqa32 %zmm0, %zmm0 {%k1} {z}
; X64-NEXT: vmovdqa32 %zmm1, %zmm1 {%k1} {z}
; X64-NEXT: vpmuludq %zmm0, %zmm1, %zmm0
; X64-NEXT: retq
%tmp = and <8 x i64> %__A, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp1 = and <8 x i64> %__B, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp2 = mul nuw <8 x i64> %tmp1, %tmp
ret <8 x i64> %tmp2
}
define <8 x i64> @test_mm512_maskz_mul_epu32(i16 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B) nounwind {
; X32-LABEL: test_mm512_maskz_mul_epu32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_mul_epu32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmuludq %zmm0, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
%conv = trunc i16 %__k to i8
%tmp = and <8 x i64> %__A, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp1 = and <8 x i64> %__B, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp2 = mul nuw <8 x i64> %tmp1, %tmp
%tmp3 = bitcast i8 %conv to <8 x i1>
%tmp4 = select <8 x i1> %tmp3, <8 x i64> %tmp2, <8 x i64> zeroinitializer
ret <8 x i64> %tmp4
}
define <8 x i64> @test_mm512_mask_mul_epu32(i16 zeroext %__k, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__src) nounwind {
; X32-LABEL: test_mm512_mask_mul_epu32:
; X32: # %bb.0:
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 {%k1}
; X32-NEXT: vmovdqa64 %zmm2, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_mul_epu32:
; X64: # %bb.0:
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmuludq %zmm0, %zmm1, %zmm2 {%k1}
; X64-NEXT: vmovdqa64 %zmm2, %zmm0
; X64-NEXT: retq
%conv = trunc i16 %__k to i8
%tmp = and <8 x i64> %__A, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp1 = and <8 x i64> %__B, <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>
%tmp2 = mul nuw <8 x i64> %tmp1, %tmp
%tmp3 = bitcast i8 %conv to <8 x i1>
%tmp4 = select <8 x i1> %tmp3, <8 x i64> %tmp2, <8 x i64> %__src
ret <8 x i64> %tmp4
}
define <8 x double> @test_mm512_set1_epi8(i8 signext %d) nounwind {
; X32-LABEL: test_mm512_set1_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: vmovd %eax, %xmm0
; X32-NEXT: vpbroadcastb %xmm0, %ymm0
; X32-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_set1_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: vmovd %edi, %xmm0
; X64-NEXT: vpbroadcastb %xmm0, %ymm0
; X64-NEXT: vinserti64x4 $1, %ymm0, %zmm0, %zmm0
; X64-NEXT: retq
entry:
%vecinit.i = insertelement <64 x i8> undef, i8 %d, i32 0
%vecinit63.i = shufflevector <64 x i8> %vecinit.i, <64 x i8> undef, <64 x i32> zeroinitializer
%0 = bitcast <64 x i8> %vecinit63.i to <8 x double>
ret <8 x double> %0
}
define <2 x double> @test_mm_cvtu32_sd(<2 x double> %__A, i32 %__B) {
; X32-LABEL: test_mm_cvtu32_sd:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vcvtusi2sdl %eax, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtu32_sd:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtusi2sdl %edi, %xmm0, %xmm0
; X64-NEXT: retq
entry:
%conv.i = uitofp i32 %__B to double
%vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0
ret <2 x double> %vecins.i
}
define <2 x double> @test_mm_cvtu64_sd(<2 x double> %__A, i64 %__B) {
; X32-LABEL: test_mm_cvtu64_sd:
; X32: # %bb.0: # %entry
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vpinsrd $1, {{[0-9]+}}(%esp), %xmm1, %xmm1
; X32-NEXT: vpunpckldq {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[1],mem[1]
; X32-NEXT: vsubpd {{\.LCPI.*}}, %xmm1, %xmm1
; X32-NEXT: vhaddpd %xmm1, %xmm1, %xmm1
; X32-NEXT: vmovsd {{.*#+}} xmm0 = xmm1[0],xmm0[1]
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtu64_sd:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtusi2sdq %rdi, %xmm0, %xmm0
; X64-NEXT: retq
entry:
%conv.i = uitofp i64 %__B to double
%vecins.i = insertelement <2 x double> %__A, double %conv.i, i32 0
ret <2 x double> %vecins.i
}
define <4 x float> @test_mm_cvtu32_ss(<4 x float> %__A, i32 %__B) {
; X32-LABEL: test_mm_cvtu32_ss:
; X32: # %bb.0: # %entry
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
; X32-NEXT: vcvtusi2ssl %eax, %xmm0, %xmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtu32_ss:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtusi2ssl %edi, %xmm0, %xmm0
; X64-NEXT: retq
entry:
%conv.i = uitofp i32 %__B to float
%vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0
ret <4 x float> %vecins.i
}
define <4 x float> @test_mm_cvtu64_ss(<4 x float> %__A, i64 %__B) {
; X32-LABEL: test_mm_cvtu64_ss:
; X32: # %bb.0: # %entry
; X32-NEXT: pushl %ebp
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: .cfi_offset %ebp, -8
; X32-NEXT: movl %esp, %ebp
; X32-NEXT: .cfi_def_cfa_register %ebp
; X32-NEXT: andl $-8, %esp
; X32-NEXT: subl $16, %esp
; X32-NEXT: movl 12(%ebp), %eax
; X32-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vpinsrd $1, %eax, %xmm1, %xmm1
; X32-NEXT: vmovq %xmm1, {{[0-9]+}}(%esp)
; X32-NEXT: xorl %ecx, %ecx
; X32-NEXT: testl %eax, %eax
; X32-NEXT: setns %cl
; X32-NEXT: fildll {{[0-9]+}}(%esp)
; X32-NEXT: fadds {{\.LCPI.*}}(,%ecx,4)
; X32-NEXT: fstps {{[0-9]+}}(%esp)
; X32-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; X32-NEXT: vmovss {{.*#+}} xmm0 = xmm1[0],xmm0[1,2,3]
; X32-NEXT: movl %ebp, %esp
; X32-NEXT: popl %ebp
; X32-NEXT: .cfi_def_cfa %esp, 4
; X32-NEXT: retl
;
; X64-LABEL: test_mm_cvtu64_ss:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtusi2ssq %rdi, %xmm0, %xmm0
; X64-NEXT: retq
entry:
%conv.i = uitofp i64 %__B to float
%vecins.i = insertelement <4 x float> %__A, float %conv.i, i32 0
ret <4 x float> %vecins.i
}
define <8 x double> @test_mm512_cvtps_pd(<8 x float> %__A) {
; X32-LABEL: test_mm512_cvtps_pd:
; X32: # %bb.0: # %entry
; X32-NEXT: vcvtps2pd %ymm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtps_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtps2pd %ymm0, %zmm0
; X64-NEXT: retq
entry:
%conv.i = fpext <8 x float> %__A to <8 x double>
ret <8 x double> %conv.i
}
define <8 x double> @test_mm512_cvtpslo_pd(<16 x float> %__A) {
; X32-LABEL: test_mm512_cvtpslo_pd:
; X32: # %bb.0: # %entry
; X32-NEXT: vcvtps2pd %ymm0, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtpslo_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: vcvtps2pd %ymm0, %zmm0
; X64-NEXT: retq
entry:
%shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%conv.i.i = fpext <8 x float> %shuffle.i.i to <8 x double>
ret <8 x double> %conv.i.i
}
define <8 x double> @test_mm512_mask_cvtps_pd(<8 x double> %__W, i8 zeroext %__U, <8 x float> %__A) {
; X32-LABEL: test_mm512_mask_cvtps_pd:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtps_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%conv.i.i = fpext <8 x float> %__A to <8 x double>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> %__W
ret <8 x double> %1
}
define <8 x double> @test_mm512_mask_cvtpslo_pd(<8 x double> %__W, i8 zeroext %__U, <16 x float> %__A) {
; X32-LABEL: test_mm512_mask_cvtpslo_pd:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtpslo_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vcvtps2pd %ymm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%shuffle.i.i = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%conv.i.i.i = fpext <8 x float> %shuffle.i.i to <8 x double>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x double> %conv.i.i.i, <8 x double> %__W
ret <8 x double> %1
}
define <8 x double> @test_mm512_maskz_cvtps_pd(i8 zeroext %__U, <8 x float> %__A) {
; X32-LABEL: test_mm512_maskz_cvtps_pd:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vcvtps2pd %ymm0, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtps_pd:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vcvtps2pd %ymm0, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%conv.i.i = fpext <8 x float> %__A to <8 x double>
%0 = bitcast i8 %__U to <8 x i1>
%1 = select <8 x i1> %0, <8 x double> %conv.i.i, <8 x double> zeroinitializer
ret <8 x double> %1
}
define <2 x i64> @test_mm512_cvtepi32_epi8(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovdb %zmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovdb %zmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%conv.i = trunc <16 x i32> %0 to <16 x i8>
%1 = bitcast <16 x i8> %conv.i to <2 x i64>
ret <2 x i64> %1
}
define <2 x i64> @test_mm512_mask_cvtepi32_epi8(<2 x i64> %__O, i16 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdb %zmm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <2 x i64> %__O to <16 x i8>
%2 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> %1, i16 %__M)
%3 = bitcast <16 x i8> %2 to <2 x i64>
ret <2 x i64> %3
}
define <2 x i64> @test_mm512_maskz_cvtepi32_epi8(i16 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi32_epi8:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovdb %zmm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = tail call <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32> %0, <16 x i8> zeroinitializer, i16 %__M)
%2 = bitcast <16 x i8> %1 to <2 x i64>
ret <2 x i64> %2
}
define <4 x i64> @test_mm512_cvtepi64_epi32(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovqd %zmm0, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovqd %zmm0, %ymm0
; X64-NEXT: retq
entry:
%conv.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast <8 x i32> %conv.i to <4 x i64>
ret <4 x i64> %0
}
define <4 x i64> @test_mm512_mask_cvtepi64_epi32(<4 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %zmm1, %ymm0 {%k1}
; X64-NEXT: retq
entry:
%conv.i.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast <4 x i64> %__O to <8 x i32>
%1 = bitcast i8 %__M to <8 x i1>
%2 = select <8 x i1> %1, <8 x i32> %conv.i.i, <8 x i32> %0
%3 = bitcast <8 x i32> %2 to <4 x i64>
ret <4 x i64> %3
}
define <4 x i64> @test_mm512_maskz_cvtepi64_epi32(i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi64_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi64_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqd %zmm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
entry:
%conv.i.i = trunc <8 x i64> %__A to <8 x i32>
%0 = bitcast i8 %__M to <8 x i1>
%1 = select <8 x i1> %0, <8 x i32> %conv.i.i, <8 x i32> zeroinitializer
%2 = bitcast <8 x i32> %1 to <4 x i64>
ret <4 x i64> %2
}
define <2 x i64> @test_mm512_cvtepi64_epi16(<8 x i64> %__A) {
; X32-LABEL: test_mm512_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: vpmovqw %zmm0, %xmm0
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: vpmovqw %zmm0, %xmm0
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%conv.i = trunc <8 x i64> %__A to <8 x i16>
%0 = bitcast <8 x i16> %conv.i to <2 x i64>
ret <2 x i64> %0
}
define <2 x i64> @test_mm512_mask_cvtepi64_epi16(<2 x i64> %__O, i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_mask_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqw %zmm1, %xmm0 {%k1}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = bitcast <2 x i64> %__O to <8 x i16>
%1 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> %0, i8 %__M)
%2 = bitcast <8 x i16> %1 to <2 x i64>
ret <2 x i64> %2
}
define <2 x i64> @test_mm512_maskz_cvtepi64_epi16(i8 zeroext %__M, <8 x i64> %__A) {
; X32-LABEL: test_mm512_maskz_cvtepi64_epi16:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; X32-NEXT: vzeroupper
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_cvtepi64_epi16:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpmovqw %zmm0, %xmm0 {%k1} {z}
; X64-NEXT: vzeroupper
; X64-NEXT: retq
entry:
%0 = tail call <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64> %__A, <8 x i16> zeroinitializer, i8 %__M)
%1 = bitcast <8 x i16> %0 to <2 x i64>
ret <2 x i64> %1
}
declare <16 x i8> @llvm.x86.avx512.mask.pmov.db.512(<16 x i32>, <16 x i8>, i16)
declare <8 x i16> @llvm.x86.avx512.mask.pmov.qw.512(<8 x i64>, <8 x i16>, i8)
define <8 x i64> @test_mm512_ternarylogic_epi32(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast <16 x i32> %3 to <8 x i64>
ret <8 x i64> %4
}
declare <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32) #1
define <8 x i64> @test_mm512_mask_ternarylogic_epi32(<8 x i64> %__A, i16 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_mask_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> %0
%6 = bitcast <16 x i32> %5 to <8 x i64>
ret <8 x i64> %6
}
define <8 x i64> @test_mm512_maskz_ternarylogic_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X32: # %bb.0: # %entry
; X32-NEXT: kmovw {{[0-9]+}}(%esp), %k1
; X32-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_ternarylogic_epi32:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogd $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = bitcast <8 x i64> %__A to <16 x i32>
%1 = bitcast <8 x i64> %__B to <16 x i32>
%2 = bitcast <8 x i64> %__C to <16 x i32>
%3 = tail call <16 x i32> @llvm.x86.avx512.pternlog.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i32 4)
%4 = bitcast i16 %__U to <16 x i1>
%5 = select <16 x i1> %4, <16 x i32> %3, <16 x i32> zeroinitializer
%6 = bitcast <16 x i32> %5 to <8 x i64>
ret <8 x i64> %6
}
define <8 x i64> @test_mm512_ternarylogic_epi64(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
ret <8 x i64> %0
}
declare <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32) #1
define <8 x i64> @test_mm512_mask_ternarylogic_epi64(<8 x i64> %__A, i8 zeroext %__U, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_mask_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_mask_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1}
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__A
ret <8 x i64> %2
}
define <8 x i64> @test_mm512_maskz_ternarylogic_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C) {
; X32-LABEL: test_mm512_maskz_ternarylogic_epi64:
; X32: # %bb.0: # %entry
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X32-NEXT: retl
;
; X64-LABEL: test_mm512_maskz_ternarylogic_epi64:
; X64: # %bb.0: # %entry
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpternlogq $4, %zmm2, %zmm1, %zmm0 {%k1} {z}
; X64-NEXT: retq
entry:
%0 = tail call <8 x i64> @llvm.x86.avx512.pternlog.q.512(<8 x i64> %__A, <8 x i64> %__B, <8 x i64> %__C, i32 4)
%1 = bitcast i8 %__U to <8 x i1>
%2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
ret <8 x i64> %2
}
!0 = !{i32 1}